From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 744B62C187; Mon, 22 Sep 2025 22:17:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579430; cv=none; b=tliK7nPTInjmVDUKcBfiq1rdHrOGVqGbf2lP83M3u91oYiBCs+XiUTmM9KUksQ9zYrSvcRSWz8rU7wYd/W0Va3cswk1PTddcB0ab9sWiY5u9SuAzYzLq/DXI8fSFVJE9LZQ8+bHh4PaTq2VtYjhfiEwGCiU+YoeYT64NN6NFdBE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579430; c=relaxed/simple; bh=b4kaadpAcdfkSSYI/s+61GIJ+vRzjMb2Vl3FcGUWroQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=HN97pk0GECf5mUW6esFQh5ZvRhYS69CkpnQZu+mqwx4neOJdZp0ZYW9WBuS0zDW/TZhRQp8XbedJKu6fm1szuX1anYE7iWKweFzl9JLVpCJ9e0SlqCl7UQxSUVzhQZ9Er5f0JMWC+rEw3HUPzukp4rIUqeo/hj4hQJ11Vx1OYoM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=fEAeba1t; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="fEAeba1t" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4eZ003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:05 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579425; bh=b4kaadpAcdfkSSYI/s+61GIJ+vRzjMb2Vl3FcGUWroQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=fEAeba1tfJ56369/2xqYr4I37/XwVEmIsC864tDkXcruGMD141FvjKMPB5mxl3nAe btqT3dYpMARuaWFhGbG5fSqeB4ZXIziIkqCITsO5TOHP+Bx7C6+PmQ2k+1JpOyp+5j B5ZMIGiq5rFZHRT7xjhJ2pD2QS0e1zbnuXuylv4g= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 1/8] __ptr_ring_full_next: Returns if ring will be full after next insertion Date: Tue, 23 Sep 2025 00:15:46 +0200 Message-ID: <20250922221553.47802-2-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Useful if the caller would like to act before the ptr_ring gets full after the next __ptr_ring_produce call. Because __ptr_ring_produce has a smp_wmb(), taking action before ensures memory ordering. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- include/linux/ptr_ring.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 551329220e4f..c45e95071d7e 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -96,6 +96,28 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) return ret; } =20 +/* Returns if the ptr_ring will be full after inserting the next ptr. + */ +static inline bool __ptr_ring_full_next(struct ptr_ring *r) +{ + int p; + + /* Since __ptr_ring_discard_one invalidates in reverse order, the + * next producer entry might be NULL even though the current one + * is not. Therefore, also check the current producer entry with + * __ptr_ring_full. + */ + if (unlikely(r->size <=3D 1 || __ptr_ring_full(r))) + return true; + + p =3D r->producer + 1; + + if (unlikely(p >=3D r->size)) + p =3D 0; + + return r->queue[p]; +} + /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. * Callers are responsible for making sure pointer that is being queued --=20 2.43.0 From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EC0D6255222; Mon, 22 Sep 2025 22:17:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579431; cv=none; b=ks65gAj/TD9X8vdSm8a0Izo10DCGCrNoJG4bQOe8jXFsr3txfgrJqq4McETgOa0lf5vtaq3JJ9DMc7oAl48PKJmeGVwgP6rT16G533sOAtZywU3GWkLP/9RkCrejTZ3uCW8qn0tgtz9+entQarbBjnjUFozlMD5/rj3A/1/nCPk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579431; c=relaxed/simple; bh=QoqqyIDfhdjjDER70xfzGzFdU7AlvujyerC0bCmeXjE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=iZW4A5OPs/zbvy2Tzp/af5HSvXfCp6UQ03Vxdr8RepxX8nfQ1p37DEQLpcVoOg5HKAz+Wf6QKMGR/WBPhCiauW+b5es9j5n8hXBEJ7dIchLdJ865L0KKNp7kCp7GWf3ku+empkkDlApwTewiMkYFj6GK+6F9b9YWq/yBlF/5vqQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=suuYwj43; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="suuYwj43" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4eb003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:05 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579426; bh=QoqqyIDfhdjjDER70xfzGzFdU7AlvujyerC0bCmeXjE=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=suuYwj43EAFxjzMLFJkrca9BflhoZwAiR77pUapz/qtLEdGQXBo/bRZrtFj/TzzfM Y4lh+FRTEGqg6cGM1T2d7PZQ0myO3BH1j5J6kHcxYkUp1pYeEXtCh3nzB2ht6SGMpW t4k9Wx+Ac+fGfNqTZJMjTH/1a2C4UUt1ONesWRxw= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 2/8] Move the decision of invalidation out of __ptr_ring_discard_one Date: Tue, 23 Sep 2025 00:15:47 +0200 Message-ID: <20250922221553.47802-3-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" __ptr_ring_will_invalidate is useful if the caller would like to act before entries of the ptr_ring get invalidated by __ptr_ring_discard_one. __ptr_ring_consume calls the new method and passes the result to __ptr_ring_discard_one, preserving the pre-patch logic. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- include/linux/ptr_ring.h | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index c45e95071d7e..78fb3efedc7a 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -266,7 +266,22 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *= r) } =20 /* Must only be called after __ptr_ring_peek returned !NULL */ -static inline void __ptr_ring_discard_one(struct ptr_ring *r) +static inline bool __ptr_ring_will_invalidate(struct ptr_ring *r) +{ + /* Once we have processed enough entries invalidate them in + * the ring all at once so producer can reuse their space in the ring. + * We also do this when we reach end of the ring - not mandatory + * but helps keep the implementation simple. + */ + int consumer_head =3D r->consumer_head + 1; + + return consumer_head - r->consumer_tail >=3D r->batch || + consumer_head >=3D r->size; +} + +/* Must only be called after __ptr_ring_peek returned !NULL */ +static inline void __ptr_ring_discard_one(struct ptr_ring *r, + bool invalidate) { /* Fundamentally, what we want to do is update consumer * index and zero out the entry so producer can reuse it. @@ -286,13 +301,7 @@ static inline void __ptr_ring_discard_one(struct ptr_r= ing *r) int consumer_head =3D r->consumer_head; int head =3D consumer_head++; =20 - /* Once we have processed enough entries invalidate them in - * the ring all at once so producer can reuse their space in the ring. - * We also do this when we reach end of the ring - not mandatory - * but helps keep the implementation simple. - */ - if (unlikely(consumer_head - r->consumer_tail >=3D r->batch || - consumer_head >=3D r->size)) { + if (unlikely(invalidate)) { /* Zero out entries in the reverse order: this way we touch the * cache line that producer might currently be reading the last; * producer won't make progress and touch other cache lines @@ -312,6 +321,7 @@ static inline void __ptr_ring_discard_one(struct ptr_ri= ng *r) =20 static inline void *__ptr_ring_consume(struct ptr_ring *r) { + bool invalidate; void *ptr; =20 /* The READ_ONCE in __ptr_ring_peek guarantees that anyone @@ -319,8 +329,10 @@ static inline void *__ptr_ring_consume(struct ptr_ring= *r) * with smp_wmb in __ptr_ring_produce. */ ptr =3D __ptr_ring_peek(r); - if (ptr) - __ptr_ring_discard_one(r); + if (ptr) { + invalidate =3D __ptr_ring_will_invalidate(r); + __ptr_ring_discard_one(r, invalidate); + } =20 return ptr; } --=20 2.43.0 From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 78FF1275B13; Mon, 22 Sep 2025 22:17:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579431; cv=none; b=YQl76CwhKSmKnVCErprsrmbykDNR+FBHJuS2lf8rOmgymhxBDqFRRUjHL+GbF4nHpaGNLjCK4xpVcGRNb+FHPXfuY8Yhgy5W2kCVtBgOHzk+rgRmw266aBn6U58DJvInQ7m+zl+JGtKqW+HjSnsmu6jLJ7Pnh5ZD/PyaGU1dflo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579431; c=relaxed/simple; bh=REBZqS6e4wRdWD+7hW/SpQ4itfFDNainpjcwtocQKbA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=FV3A4BX/9qXY0y58AJnStwu8fFpzi2PXUPSrc0qHmcPYqWb5hk+BuDJI6RFFcUQsPq8NcAcT2ZU4bzQhvhQ6qe531hjHwhcdKQjXknWbrIiGnXM7w/N8ELiq2kPvGfB/7RynsGx5lTbHVxzaYtQ0d/Q5KpkaACIWMqhm4oen5js= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=AcPd6d97; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="AcPd6d97" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4ed003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:06 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579426; bh=REBZqS6e4wRdWD+7hW/SpQ4itfFDNainpjcwtocQKbA=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=AcPd6d97+fFDBsnDH4VaeUdvo/pL9mBI6aTNg0ahTO50boBqOQ4D1Say40qTU6qf/ lZDydUgmGDWRkYRavJWkyFGcBz6/d4Q0xfHd/VkUb5vBxTmFcpSYkrGYHZrD8fkxDl 2hiylrD27wXJdrv8/I+Q9gtouuCA59moY/CQiWLI= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 3/8] TUN, TAP & vhost_net: Stop netdev queue before reaching a full ptr_ring Date: Tue, 23 Sep 2025 00:15:48 +0200 Message-ID: <20250922221553.47802-4-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Stop the netdev queue ahead of __ptr_ring_produce when __ptr_ring_full_next signals the ring is about to fill. Due to the smp_wmb() of __ptr_ring_produce the consumer is guaranteed to be able to notice the stopped netdev queue after seeing the new ptr_ring entry. As both __ptr_ring_full_next and __ptr_ring_produce need the producer_lock, the lock is held during the execution of both methods. dev->lltx is disabled to ensure that tun_net_xmit is not called even though the netdev queue is stopped (which happened in my testing, resulting in rare packet drops). Consequently, the update of trans_start in tun_net_xmit is also removed. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tun.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 86a9e927d0ff..c6b22af9bae8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -931,7 +931,7 @@ static int tun_net_init(struct net_device *dev) dev->vlan_features =3D dev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); - dev->lltx =3D true; + dev->lltx =3D false; =20 tun->flags =3D (tun->flags & ~TUN_FEATURES) | (ifr->ifr_flags & TUN_FEATURES); @@ -1060,14 +1060,18 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb= , struct net_device *dev) =20 nf_reset_ct(skb); =20 - if (ptr_ring_produce(&tfile->tx_ring, skb)) { + queue =3D netdev_get_tx_queue(dev, txq); + + spin_lock(&tfile->tx_ring.producer_lock); + if (__ptr_ring_full_next(&tfile->tx_ring)) + netif_tx_stop_queue(queue); + + if (unlikely(__ptr_ring_produce(&tfile->tx_ring, skb))) { + spin_unlock(&tfile->tx_ring.producer_lock); drop_reason =3D SKB_DROP_REASON_FULL_RING; goto drop; } - - /* dev->lltx requires to do our own update of trans_start */ - queue =3D netdev_get_tx_queue(dev, txq); - txq_trans_cond_update(queue); + spin_unlock(&tfile->tx_ring.producer_lock); =20 /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) --=20 2.43.0 From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7906227876A; Mon, 22 Sep 2025 22:17:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579432; cv=none; b=uIzEzO++XBLJSUMPi4vRpF620FGe5gbRzEy+zj7FUWnfAtOl71cxXQRUEqRhP5TCOHpJYmooGHom6ojoy4/DQdXm/8cZmfqkcUNh5FcSeegCZy4x+iA4SUsrpyQ13BYPOTCwVwqK2xFmKfnHdKoYzaCQRzAzHUOagvbtMqSoZK4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579432; c=relaxed/simple; bh=V+PhwFYWszbBXgU4tk0LQIa75F+W369ypV6Mf0SMujk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=R3Czx79+vBKmm2wLRfUBU7gSuuOnKSK79ma9bl6nQKlnsaWmEOlJa6F8UAkF+8asfd5xEsWWORWjOqfqz7PAsPES3s7V5LC+rFrh+PMBUfLZvDOVY4yM2omgdiksRimvIX6PBC9CNrwTj7fVciAY9DPYEqtMJQqJvSAtKL6ycZI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=qHOopKzw; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="qHOopKzw" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4ef003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:06 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579426; bh=V+PhwFYWszbBXgU4tk0LQIa75F+W369ypV6Mf0SMujk=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=qHOopKzw0deAfiXZsLK/nqv7nyVPFWw4uKo3c1M6/LnHpjmn52MQkI7JABdQQopah MOvru5fzwIbah4vSFj41z1ym68i2LZo+CevKfEVv4cl/YT9sk496iVDEM2NQh54Ftz q2aJddPTvE7f57kq+ay9GJwrsdwtcFIVK+YcV58o= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 4/8] TUN & TAP: Wake netdev queue after consuming an entry Date: Tue, 23 Sep 2025 00:15:49 +0200 Message-ID: <20250922221553.47802-5-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The new wrappers tun_ring_consume/tap_ring_consume deal with consuming an entry of the ptr_ring and then waking the netdev queue when entries got invalidated to be used again by the producer. To avoid waking the netdev queue when the ptr_ring is full, it is checked if the netdev queue is stopped before invalidating entries. Like that the netdev queue can be safely woken after invalidating entries. The READ_ONCE in __ptr_ring_peek, paired with the smp_wmb() in __ptr_ring_produce within tun_net_xmit guarantees that the information about the netdev queue being stopped is visible after __ptr_ring_peek is called. The netdev queue is also woken after resizing the ptr_ring. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tap.c | 44 +++++++++++++++++++++++++++++++++++++++++++- drivers/net/tun.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 1197f245e873..f8292721a9d6 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -753,6 +753,46 @@ static ssize_t tap_put_user(struct tap_queue *q, return ret ? ret : total; } =20 +static struct sk_buff *tap_ring_consume(struct tap_queue *q) +{ + struct netdev_queue *txq; + struct net_device *dev; + bool will_invalidate; + bool stopped; + void *ptr; + + spin_lock(&q->ring.consumer_lock); + ptr =3D __ptr_ring_peek(&q->ring); + if (!ptr) { + spin_unlock(&q->ring.consumer_lock); + return ptr; + } + + /* Check if the queue stopped before zeroing out, so no ptr get + * produced in the meantime, because this could result in waking + * even though the ptr_ring is full. The order of the operations + * is ensured by barrier(). + */ + will_invalidate =3D __ptr_ring_will_invalidate(&q->ring); + if (unlikely(will_invalidate)) { + rcu_read_lock(); + dev =3D rcu_dereference(q->tap)->dev; + txq =3D netdev_get_tx_queue(dev, q->queue_index); + stopped =3D netif_tx_queue_stopped(txq); + } + barrier(); + __ptr_ring_discard_one(&q->ring, will_invalidate); + + if (unlikely(will_invalidate)) { + if (stopped) + netif_tx_wake_queue(txq); + rcu_read_unlock(); + } + spin_unlock(&q->ring.consumer_lock); + + return ptr; +} + static ssize_t tap_do_read(struct tap_queue *q, struct iov_iter *to, int noblock, struct sk_buff *skb) @@ -774,7 +814,7 @@ static ssize_t tap_do_read(struct tap_queue *q, TASK_INTERRUPTIBLE); =20 /* Read frames from the queue */ - skb =3D ptr_ring_consume(&q->ring); + skb =3D tap_ring_consume(q); if (skb) break; if (noblock) { @@ -1207,6 +1247,8 @@ int tap_queue_resize(struct tap_dev *tap) ret =3D ptr_ring_resize_multiple_bh(rings, n, dev->tx_queue_len, GFP_KERNEL, __skb_array_destroy_skb); + if (netif_running(dev)) + netif_tx_wake_all_queues(dev); =20 kfree(rings); return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c6b22af9bae8..682df8157b55 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2114,13 +2114,53 @@ static ssize_t tun_put_user(struct tun_struct *tun, return total; } =20 +static void *tun_ring_consume(struct tun_file *tfile) +{ + struct netdev_queue *txq; + struct net_device *dev; + bool will_invalidate; + bool stopped; + void *ptr; + + spin_lock(&tfile->tx_ring.consumer_lock); + ptr =3D __ptr_ring_peek(&tfile->tx_ring); + if (!ptr) { + spin_unlock(&tfile->tx_ring.consumer_lock); + return ptr; + } + + /* Check if the queue stopped before zeroing out, so no ptr get + * produced in the meantime, because this could result in waking + * even though the ptr_ring is full. The order of the operations + * is ensured by barrier(). + */ + will_invalidate =3D __ptr_ring_will_invalidate(&tfile->tx_ring); + if (unlikely(will_invalidate)) { + rcu_read_lock(); + dev =3D rcu_dereference(tfile->tun)->dev; + txq =3D netdev_get_tx_queue(dev, tfile->queue_index); + stopped =3D netif_tx_queue_stopped(txq); + } + barrier(); + __ptr_ring_discard_one(&tfile->tx_ring, will_invalidate); + + if (unlikely(will_invalidate)) { + if (stopped) + netif_tx_wake_queue(txq); + rcu_read_unlock(); + } + spin_unlock(&tfile->tx_ring.consumer_lock); + + return ptr; +} + static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) { DECLARE_WAITQUEUE(wait, current); void *ptr =3D NULL; int error =3D 0; =20 - ptr =3D ptr_ring_consume(&tfile->tx_ring); + ptr =3D tun_ring_consume(tfile); if (ptr) goto out; if (noblock) { @@ -2132,7 +2172,7 @@ static void *tun_ring_recv(struct tun_file *tfile, in= t noblock, int *err) =20 while (1) { set_current_state(TASK_INTERRUPTIBLE); - ptr =3D ptr_ring_consume(&tfile->tx_ring); + ptr =3D tun_ring_consume(tfile); if (ptr) break; if (signal_pending(current)) { @@ -3621,6 +3661,9 @@ static int tun_queue_resize(struct tun_struct *tun) dev->tx_queue_len, GFP_KERNEL, tun_ptr_free); =20 + if (netif_running(dev)) + netif_tx_wake_all_queues(dev); + kfree(rings); return ret; } --=20 2.43.0 From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BAB7227A906; Mon, 22 Sep 2025 22:17:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579433; cv=none; b=M8DjUKBR6kEdZBYZtPXhJH133PvU0SBHpcSn8+1pm9S4cxr9abSAmnh+f/9HFwu9+DprPbk+ddNSHvZd4yemPwJIM0UwYV2JqWM+YvleIvxkGTpxFMO7+gCvqaeTKb5L/fwgF4g2izRq+80mqHHJ/9UsNzUCkHvgxdM71EIeoc0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579433; c=relaxed/simple; bh=5PuVBkf+qGmZlYXb2NZVJTfDHMzFZGPoycz3QdxG9Cs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=K+ErwhgMlq1R9zeuS37VA/wTGx3syLZ9R08KGIEmBwnl6k/4XGYA0BlO/+gZFFdZwDPpZYzLF3WoM01zYWmhyNqFytqXULNVADq3jhFDjTPHE77dXKPGl6YB6JBPeJdtRriQ/1r8RQJAz4eU8rk0bzMsDH7ZfuVGuZauDC7l+H4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=P7Jj/jJe; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="P7Jj/jJe" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4eh003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:06 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579427; bh=5PuVBkf+qGmZlYXb2NZVJTfDHMzFZGPoycz3QdxG9Cs=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=P7Jj/jJeG/6RXXDI8IKdmk9mUIroKPkjQ8DpWsA/hZK/+3SgUvGHbBHcFbFumLonG A2C5s7LXaCfDDXzTmhDgGLzVWtZCm3Wto2yQkuyTatzkWHfS2wXY9XOaStXVBhXJNj +T+zrR71NPIZW1jrMfpNX0U0SYD2dM01afJZvHKY= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 5/8] TUN & TAP: Provide ptr_ring_consume_batched wrappers for vhost_net Date: Tue, 23 Sep 2025 00:15:50 +0200 Message-ID: <20250922221553.47802-6-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The wrappers tun_ring_consume_batched/tap_ring_consume_batched are similar to the wrappers tun_ring_consume/tap_ring_consume. They deal with consuming a batch of entries of the ptr_ring and then waking the netdev queue whenever entries get invalidated to be used again by the producer. To avoid waking the netdev queue when the ptr_ring is full, it is checked if the netdev queue is stopped before invalidating entries. Like that the netdev queue can be safely woken after invalidating entries. The READ_ONCE in __ptr_ring_peek, paired with the smp_wmb() in __ptr_ring_produce within tun_net_xmit guarantees that the information about the netdev queue being stopped is visible after __ptr_ring_peek is called. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tap.c | 52 ++++++++++++++++++++++++++++++++++++++++ drivers/net/tun.c | 54 ++++++++++++++++++++++++++++++++++++++++++ include/linux/if_tap.h | 6 +++++ include/linux/if_tun.h | 7 ++++++ 4 files changed, 119 insertions(+) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index f8292721a9d6..651d48612329 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1216,6 +1216,58 @@ struct socket *tap_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tap_get_socket); =20 +int tap_ring_consume_batched(struct file *file, + void **array, int n) +{ + struct tap_queue *q =3D file->private_data; + struct netdev_queue *txq; + struct net_device *dev; + bool will_invalidate; + bool stopped; + void *ptr; + int i; + + spin_lock(&q->ring.consumer_lock); + ptr =3D __ptr_ring_peek(&q->ring); + + if (!ptr) { + spin_unlock(&q->ring.consumer_lock); + return 0; + } + + i =3D 0; + do { + /* Check if the queue stopped before zeroing out, so no + * ptr get produced in the meantime, because this could + * result in waking even though the ptr_ring is full. + * The order of the operations is ensured by barrier(). + */ + will_invalidate =3D __ptr_ring_will_invalidate(&q->ring); + if (unlikely(will_invalidate)) { + rcu_read_lock(); + dev =3D rcu_dereference(q->tap)->dev; + txq =3D netdev_get_tx_queue(dev, q->queue_index); + stopped =3D netif_tx_queue_stopped(txq); + } + barrier(); + __ptr_ring_discard_one(&q->ring, will_invalidate); + + if (unlikely(will_invalidate)) { + if (stopped) + netif_tx_wake_queue(txq); + rcu_read_unlock(); + } + + array[i++] =3D ptr; + if (i >=3D n) + break; + } while ((ptr =3D __ptr_ring_peek(&q->ring))); + spin_unlock(&q->ring.consumer_lock); + + return i; +} +EXPORT_SYMBOL_GPL(tap_ring_consume_batched); + struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 682df8157b55..7566b22780fb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3759,6 +3759,60 @@ struct socket *tun_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tun_get_socket); =20 +int tun_ring_consume_batched(struct file *file, + void **array, int n) +{ + struct tun_file *tfile =3D file->private_data; + struct netdev_queue *txq; + struct net_device *dev; + bool will_invalidate; + bool stopped; + void *ptr; + int i; + + spin_lock(&tfile->tx_ring.consumer_lock); + ptr =3D __ptr_ring_peek(&tfile->tx_ring); + + if (!ptr) { + spin_unlock(&tfile->tx_ring.consumer_lock); + return 0; + } + + i =3D 0; + do { + /* Check if the queue stopped before zeroing out, so no + * ptr get produced in the meantime, because this could + * result in waking even though the ptr_ring is full. + * The order of the operations is ensured by barrier(). + */ + will_invalidate =3D + __ptr_ring_will_invalidate(&tfile->tx_ring); + if (unlikely(will_invalidate)) { + rcu_read_lock(); + dev =3D rcu_dereference(tfile->tun)->dev; + txq =3D netdev_get_tx_queue(dev, + tfile->queue_index); + stopped =3D netif_tx_queue_stopped(txq); + } + barrier(); + __ptr_ring_discard_one(&tfile->tx_ring, will_invalidate); + + if (unlikely(will_invalidate)) { + if (stopped) + netif_tx_wake_queue(txq); + rcu_read_unlock(); + } + + array[i++] =3D ptr; + if (i >=3D n) + break; + } while ((ptr =3D __ptr_ring_peek(&tfile->tx_ring))); + spin_unlock(&tfile->tx_ring.consumer_lock); + + return i; +} +EXPORT_SYMBOL_GPL(tun_ring_consume_batched); + struct ptr_ring *tun_get_tx_ring(struct file *file) { struct tun_file *tfile; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 553552fa635c..2e5542d6aef4 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -11,6 +11,7 @@ struct socket; #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); struct ptr_ring *tap_get_ptr_ring(struct file *file); +int tap_ring_consume_batched(struct file *file, void **array, int n); #else #include #include @@ -22,6 +23,11 @@ static inline struct ptr_ring *tap_get_ptr_ring(struct f= ile *f) { return ERR_PTR(-EINVAL); } +static inline int tap_ring_consume_batched(struct file *f, + void **array, int n) +{ + return 0; +} #endif /* CONFIG_TAP */ =20 /* diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 80166eb62f41..5b41525ac007 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -22,6 +22,7 @@ struct tun_msg_ctl { #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); +int tun_ring_consume_batched(struct file *file, void **array, int n); =20 static inline bool tun_is_xdp_frame(void *ptr) { @@ -55,6 +56,12 @@ static inline struct ptr_ring *tun_get_tx_ring(struct fi= le *f) return ERR_PTR(-EINVAL); } =20 +static inline int tun_ring_consume_batched(struct file *file, + void **array, int n) +{ + return 0; +} + static inline bool tun_is_xdp_frame(void *ptr) { return false; --=20 2.43.0 From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 46CD627CCC4; Mon, 22 Sep 2025 22:17:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579432; cv=none; b=F8Js72J7/SX3cvmbuX1GAmWPH2n2guP2fZCnDBFlYbCRa9vzObHdQf8/BTWl0+gR5PuSwoTCJ38p8AONetgZBmk40uiENCL3RAtzkSeVN9PWdys8P+cWB02G6eMoHnSRQXSV7qnZWSTePnYm/8rx8dxgQU3fqmKskkDMg9sPvM4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579432; c=relaxed/simple; bh=FQr4aqspFo4pzDG4qZKffIA+U/BJaMqCuzjHpZXtlkU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CwaJqoFX6yuXYhOUIBV7LCog6EOBzUxq3TOFljCpFA5oKO5YWCQysXgZtqpQWx73+PAelXTZV3U1vEQKzbQPtPrKvbAMRsmHGdZBsh3A4AiN27kl3kIu7dt7UAcBmIFesuX+8xtbXHYtA6rk92mb4JEut8TcLu+zgwiSaVEUk/8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=KLvgS0qH; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="KLvgS0qH" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4ej003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:07 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579427; bh=FQr4aqspFo4pzDG4qZKffIA+U/BJaMqCuzjHpZXtlkU=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=KLvgS0qH1N3gQlDkECju6/5BCwkyyM9POm4wwTtVUgXvhUEsm7WSxXocUxzUJhiQZ lpZ22VhN5NBWdZ4nXYpuGv+uacEw4h6x45mS0jtoqvzXeVIxRKVe6yuRCyzH5mz7ln kek1ekbNnUC7RQEqlBUyBtnJ5Dckx/il8fV8GHkQ= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 6/8] TUN & TAP: Provide ptr_ring_unconsume wrappers for vhost_net Date: Tue, 23 Sep 2025 00:15:51 +0200 Message-ID: <20250922221553.47802-7-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tap.c | 9 +++++++++ drivers/net/tun.c | 9 +++++++++ include/linux/if_tap.h | 4 ++++ include/linux/if_tun.h | 5 +++++ 4 files changed, 27 insertions(+) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 651d48612329..9720481f6d41 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1268,6 +1268,15 @@ int tap_ring_consume_batched(struct file *file, } EXPORT_SYMBOL_GPL(tap_ring_consume_batched); =20 +void tap_ring_unconsume(struct file *file, void **batch, int n, + void (*destroy)(void *)) +{ + struct tap_queue *q =3D file->private_data; + + ptr_ring_unconsume(&q->ring, batch, n, destroy); +} +EXPORT_SYMBOL_GPL(tap_ring_unconsume); + struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7566b22780fb..25b170e903e1 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3813,6 +3813,15 @@ int tun_ring_consume_batched(struct file *file, } EXPORT_SYMBOL_GPL(tun_ring_consume_batched); =20 +void tun_ring_unconsume(struct file *file, void **batch, int n, + void (*destroy)(void *)) +{ + struct tun_file *tfile =3D file->private_data; + + ptr_ring_unconsume(&tfile->tx_ring, batch, n, destroy); +} +EXPORT_SYMBOL_GPL(tun_ring_unconsume); + struct ptr_ring *tun_get_tx_ring(struct file *file) { struct tun_file *tfile; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 2e5542d6aef4..0cf8cf200d52 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -12,6 +12,8 @@ struct socket; struct socket *tap_get_socket(struct file *); struct ptr_ring *tap_get_ptr_ring(struct file *file); int tap_ring_consume_batched(struct file *file, void **array, int n); +void tap_ring_unconsume(struct file *file, void **batch, int n, + void (*destroy)(void *)); #else #include #include @@ -28,6 +30,8 @@ static inline int tap_ring_consume_batched(struct file *f, { return 0; } +static inline void tap_ring_unconsume(struct file *file, void **batch, + int n, void (*destroy)(void *)) {} #endif /* CONFIG_TAP */ =20 /* diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 5b41525ac007..bd954bb117e8 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -23,6 +23,8 @@ struct tun_msg_ctl { struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); int tun_ring_consume_batched(struct file *file, void **array, int n); +void tun_ring_unconsume(struct file *file, void **batch, int n, + void (*destroy)(void *)); =20 static inline bool tun_is_xdp_frame(void *ptr) { @@ -62,6 +64,9 @@ static inline int tun_ring_consume_batched(struct file *f= ile, return 0; } =20 +static inline void tun_ring_unconsume(struct file *file, void **batch, + int n, void (*destroy)(void *)) {} + static inline bool tun_is_xdp_frame(void *ptr) { return false; --=20 2.43.0 From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C2AF3285058; Mon, 22 Sep 2025 22:17:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579433; cv=none; b=EWUr6sLHFOTb8d4PAuOJKSwyGCGOmKwIvvi5o6TuDT9aSosnPn3RxJIr7261WQM23w7kDJxcgt7c5mu9XMi/w3ZhhpW+C0mZLU7eNiIXGN4edoL6GZJTA374tbPTd8x4/je/ORxGRA4M2Ld2tdqB7ea1gxdM34rE/4IcLJflKYM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579433; c=relaxed/simple; bh=Afxt95rlvAp2SeBhWrokAWqs1WgjIzNOiYGHEG2MVqM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=S8Ql6a5V0z+05dgeJO2l4TFc1dnrlqPtBX2AG7Qk5W9N8wBodXGBEpSpDLVIVjdYUlajkzKZrkZL1974k+KW65qZgWt4PfoTtHugCDlpKxSLg1rvS9M2LbAtqfm3PFhkL97dFSYlwh7JPChFv70LVlbb8AHgKZqR/xzC7ZutqYY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=AJhE/F+s; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="AJhE/F+s" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4el003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:07 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579427; bh=Afxt95rlvAp2SeBhWrokAWqs1WgjIzNOiYGHEG2MVqM=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=AJhE/F+sA9PKME6F+dmp1BbEb9uAFG/REWZLE5qWkGkMJmrIWVY0Zmg9cvHQxdvND lUliusnBa7TOy+2LUcnp66LFfCIBphQQt9Q/C9rAA6Ou3CqhP1n86RTfSTUR8ZdeaD LBKwGVIIVk/wgpEP52sq6XyNDASu5forIDtSjJZQ= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 7/8] TUN & TAP: Methods to determine whether file is TUN/TAP for vhost_net Date: Tue, 23 Sep 2025 00:15:52 +0200 Message-ID: <20250922221553.47802-8-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Those wrappers are inspired by tun_get_tx_ring/tap_get_tx_ring and replace those methods. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tap.c | 10 +++++----- drivers/net/tun.c | 10 +++++----- include/linux/if_tap.h | 5 +++++ include/linux/if_tun.h | 6 ++++++ 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 9720481f6d41..8d3e74330309 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1277,18 +1277,18 @@ void tap_ring_unconsume(struct file *file, void **b= atch, int n, } EXPORT_SYMBOL_GPL(tap_ring_unconsume); =20 -struct ptr_ring *tap_get_ptr_ring(struct file *file) +bool is_tap_file(struct file *file) { struct tap_queue *q; =20 if (file->f_op !=3D &tap_fops) - return ERR_PTR(-EINVAL); + return false; q =3D file->private_data; if (!q) - return ERR_PTR(-EBADFD); - return &q->ring; + return false; + return true; } -EXPORT_SYMBOL_GPL(tap_get_ptr_ring); +EXPORT_SYMBOL_GPL(is_tap_file); =20 int tap_queue_resize(struct tap_dev *tap) { diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 25b170e903e1..b0193b06fedc 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3822,18 +3822,18 @@ void tun_ring_unconsume(struct file *file, void **b= atch, int n, } EXPORT_SYMBOL_GPL(tun_ring_unconsume); =20 -struct ptr_ring *tun_get_tx_ring(struct file *file) +bool is_tun_file(struct file *file) { struct tun_file *tfile; =20 if (file->f_op !=3D &tun_fops) - return ERR_PTR(-EINVAL); + return false; tfile =3D file->private_data; if (!tfile) - return ERR_PTR(-EBADFD); - return &tfile->tx_ring; + return false; + return true; } -EXPORT_SYMBOL_GPL(tun_get_tx_ring); +EXPORT_SYMBOL_GPL(is_tun_file); =20 module_init(tun_init); module_exit(tun_cleanup); diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 0cf8cf200d52..5bbcc8611bf5 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -14,6 +14,7 @@ struct ptr_ring *tap_get_ptr_ring(struct file *file); int tap_ring_consume_batched(struct file *file, void **array, int n); void tap_ring_unconsume(struct file *file, void **batch, int n, void (*destroy)(void *)); +bool is_tap_file(struct file *file); #else #include #include @@ -32,6 +33,10 @@ static inline int tap_ring_consume_batched(struct file *= f, } static inline void tap_ring_unconsume(struct file *file, void **batch, int n, void (*destroy)(void *)) {} +static inline bool is_tap_file(struct file *f) +{ + return false; +} #endif /* CONFIG_TAP */ =20 /* diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index bd954bb117e8..869d61898e60 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -25,6 +25,7 @@ struct ptr_ring *tun_get_tx_ring(struct file *file); int tun_ring_consume_batched(struct file *file, void **array, int n); void tun_ring_unconsume(struct file *file, void **batch, int n, void (*destroy)(void *)); +bool is_tun_file(struct file *file); =20 static inline bool tun_is_xdp_frame(void *ptr) { @@ -67,6 +68,11 @@ static inline int tun_ring_consume_batched(struct file *= file, static inline void tun_ring_unconsume(struct file *file, void **batch, int n, void (*destroy)(void *)) {} =20 +static inline bool is_tun_file(struct file *f) +{ + return false; +} + static inline bool tun_is_xdp_frame(void *ptr) { return false; --=20 2.43.0 From nobody Thu Oct 2 03:27:49 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D81C028725A; Mon, 22 Sep 2025 22:17:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579433; cv=none; b=j1kqb5+COdeUliU6+DtZ5FG0uB/+akI1goC1mahu504zR+c1Vvavxoia/7JFRszeDACK8zL0nk9CVmmHPkSGJiJ9M5K7unQtaMmbrX7va9XzIdVrlaSAg3C4z0smgV4qzSVmgxaxCcrYclj1e6JFhkvscWyj5ojo7TppU4FpjAY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758579433; c=relaxed/simple; bh=aRqxQBtjU6+rvyZVBu/BLGv3XNOiqN63EK6zK9acjS0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=rBaQix8ihIQArSdvYqui2BJx16TQ/WZZ2xsH0UsW0td4Ks9wVLfGsnDpzr7YUBM1AyoHz5b1mN/hlUBbSb0dpavRFYqZBAJjY9dbSLcezmo5GqOhPoK7AaiX925+fChUCWLbKZhrYkTCSvhk2O2Z00Pzd10hZZ+es+0gtbOBiFw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=E/J15dFW; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="E/J15dFW" Received: from simon-Latitude-5450.fritz.box (p5dc88066.dip0.t-ipconnect.de [93.200.128.102]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.10/8.18.1.10) with ESMTPSA id 58MMH4en003919 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 23 Sep 2025 00:17:07 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1758579428; bh=aRqxQBtjU6+rvyZVBu/BLGv3XNOiqN63EK6zK9acjS0=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=E/J15dFWROga6n6cRlsCwaio02tipl9T9AO72hyiym+6rD/r5r/8GSfOgNGuW6ELT AgKtZOskiEA60dIKWuJbLWQXKLmxDoNWzYZOd/g7pOb4z5qAMm6KzJez933H95/Omu lB6pA6lnN4xHhrHLuz5BiOhVFHhflQYpehDH8c/A= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, leiyang@redhat.com, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH net-next v5 8/8] vhost_net: Replace rx_ring with calls of TUN/TAP wrappers Date: Tue, 23 Sep 2025 00:15:53 +0200 Message-ID: <20250922221553.47802-9-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> References: <20250922221553.47802-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Instead of the rx_ring, the virtqueue saves the interface type TUN, TAP (or IF_NONE) to call TUN/TAP wrappers. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/vhost/net.c | 90 +++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index c6508fe0d5c8..6be17b53cc6c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -127,10 +127,10 @@ struct vhost_net_virtqueue { /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ struct vhost_net_ubuf_ref *ubufs; - struct ptr_ring *rx_ring; struct vhost_net_buf rxq; /* Batched XDP buffs */ struct xdp_buff *xdp; + enum if_type {IF_NONE =3D 0, TUN, TAP} type; }; =20 struct vhost_net { @@ -176,24 +176,54 @@ static void *vhost_net_buf_consume(struct vhost_net_b= uf *rxq) return ret; } =20 -static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) +static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq, + struct sock *sk) { + struct file *file =3D sk->sk_socket->file; struct vhost_net_buf *rxq =3D &nvq->rxq; =20 rxq->head =3D 0; - rxq->tail =3D ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, - VHOST_NET_BATCH); + + switch (nvq->type) { + case TUN: + rxq->tail =3D tun_ring_consume_batched(file, + rxq->queue, VHOST_NET_BATCH); + break; + case TAP: + rxq->tail =3D tap_ring_consume_batched(file, + rxq->queue, VHOST_NET_BATCH); + break; + case IF_NONE: + WARN_ON_ONCE(); + } + return rxq->tail; } =20 -static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) +static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq, + struct socket *sk) { struct vhost_net_buf *rxq =3D &nvq->rxq; - - if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { - ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, - vhost_net_buf_get_size(rxq), - tun_ptr_free); + struct file *file; + + if (sk && !vhost_net_buf_is_empty(rxq)) { + file =3D sk->file; + switch (nvq->type) { + case TUN: + tun_ring_unconsume(file, + rxq->queue + rxq->head, + vhost_net_buf_get_size(rxq), + tun_ptr_free); + break; + case TAP: + tap_ring_unconsume(file, + rxq->queue + rxq->head, + vhost_net_buf_get_size(rxq), + tun_ptr_free); + break; + case IF_NONE: + return; + } rxq->head =3D rxq->tail =3D 0; } } @@ -209,14 +239,15 @@ static int vhost_net_buf_peek_len(void *ptr) return __skb_array_len_with_tag(ptr); } =20 -static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) +static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq, + struct sock *sk) { struct vhost_net_buf *rxq =3D &nvq->rxq; =20 if (!vhost_net_buf_is_empty(rxq)) goto out; =20 - if (!vhost_net_buf_produce(nvq)) + if (!vhost_net_buf_produce(nvq, sk)) return 0; =20 out: @@ -998,8 +1029,8 @@ static int peek_head_len(struct vhost_net_virtqueue *r= vq, struct sock *sk) int len =3D 0; unsigned long flags; =20 - if (rvq->rx_ring) - return vhost_net_buf_peek(rvq); + if (rvq->type) + return vhost_net_buf_peek(rvq, sk); =20 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head =3D skb_peek(&sk->sk_receive_queue); @@ -1207,7 +1238,7 @@ static void handle_rx(struct vhost_net *net) goto out; } busyloop_intr =3D false; - if (nvq->rx_ring) + if (nvq->type) msg.msg_control =3D vhost_net_buf_consume(&nvq->rxq); /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { @@ -1363,7 +1394,7 @@ static int vhost_net_open(struct inode *inode, struct= file *f) n->vqs[i].batched_xdp =3D 0; n->vqs[i].vhost_hlen =3D 0; n->vqs[i].sock_hlen =3D 0; - n->vqs[i].rx_ring =3D NULL; + n->vqs[i].type =3D IF_NONE; vhost_net_buf_init(&n->vqs[i].rxq); } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, @@ -1393,8 +1424,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_= net *n, sock =3D vhost_vq_get_backend(vq); vhost_net_disable_vq(n, vq); vhost_vq_set_backend(vq, NULL); - vhost_net_buf_unproduce(nvq); - nvq->rx_ring =3D NULL; + vhost_net_buf_unproduce(nvq, sock); + nvq->type =3D IF_NONE; mutex_unlock(&vq->mutex); return sock; } @@ -1474,18 +1505,13 @@ static struct socket *get_raw_socket(int fd) return ERR_PTR(r); } =20 -static struct ptr_ring *get_tap_ptr_ring(struct file *file) +static enum if_type get_if_type(struct file *file) { - struct ptr_ring *ring; - ring =3D tun_get_tx_ring(file); - if (!IS_ERR(ring)) - goto out; - ring =3D tap_get_ptr_ring(file); - if (!IS_ERR(ring)) - goto out; - ring =3D NULL; -out: - return ring; + if (is_tap_file(file)) + return TAP; + if (is_tun_file(file)) + return TUN; + return IF_NONE; } =20 static struct socket *get_tap_socket(int fd) @@ -1567,7 +1593,7 @@ static long vhost_net_set_backend(struct vhost_net *n= , unsigned index, int fd) =20 vhost_net_disable_vq(n, vq); vhost_vq_set_backend(vq, sock); - vhost_net_buf_unproduce(nvq); + vhost_net_buf_unproduce(nvq, sock); r =3D vhost_vq_init_access(vq); if (r) goto err_used; @@ -1576,9 +1602,9 @@ static long vhost_net_set_backend(struct vhost_net *n= , unsigned index, int fd) goto err_used; if (index =3D=3D VHOST_NET_VQ_RX) { if (sock) - nvq->rx_ring =3D get_tap_ptr_ring(sock->file); + nvq->type =3D get_if_type(sock->file); else - nvq->rx_ring =3D NULL; + nvq->type =3D IF_NONE; } =20 oldubufs =3D nvq->ubufs; --=20 2.43.0