Implement new ring buffer produce and consume functions for tun and tap
drivers that provide lockless producer-consumer synchronization and
netdev queue management to prevent ptr_ring tail drop and permanent
starvation.
- tun_ring_produce(): Produces packets to the ptr_ring with proper memory
barriers and proactively stops the netdev queue when the ring is about
to become full.
- __tun_ring_consume() / __tap_ring_consume(): Internal consume functions
that check if the netdev queue was stopped due to a full ring, and wake
it when space becomes available. Uses memory barriers to ensure proper
ordering between producer and consumer.
- tun_ring_consume() / tap_ring_consume(): Wrapper functions that acquire
the consumer lock before calling the internal consume functions.
Key features:
- Proactive queue stopping using __ptr_ring_full_next() to stop the queue
before it becomes completely full.
- Not stopping the queue when the ptr_ring is full already, because if
the consumer empties all entries in the meantime, stopping the queue
would cause permanent starvation.
- Conditional queue waking using __ptr_ring_consume_created_space() to
wake the queue only when space is actually created in the ring.
- Prevents permanent starvation by ensuring the queue is also woken when
the ring becomes empty, which can happen when racing the producer.
NB: __always_unused on unused functions, to be removed later in the
series to not break bisectability.
Co-developed-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
Signed-off-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
Co-developed by: Jon Kohler <jon@nutanix.com>
Signed-off-by: Jon Kohler <jon@nutanix.com>
Signed-off-by: Simon Schippers <simon.schippers@tu-dortmund.de>
---
drivers/net/tap.c | 63 +++++++++++++++++++++++++++++
drivers/net/tun.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 164 insertions(+)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 1197f245e873..c370a02789eb 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -753,6 +753,69 @@ static ssize_t tap_put_user(struct tap_queue *q,
return ret ? ret : total;
}
+/*
+ * Consume a packet from the transmit ring. Callers must hold
+ * the consumer_lock of the ptr_ring. If the ring was full and the
+ * queue was stopped, this may wake up the queue if space is created.
+ */
+static void *__tap_ring_consume(struct tap_queue *q)
+{
+ struct ptr_ring *ring = &q->ring;
+ struct netdev_queue *txq;
+ struct net_device *dev;
+ bool stopped;
+ void *ptr;
+
+ ptr = __ptr_ring_peek(ring);
+ if (!ptr)
+ return ptr;
+
+ /* Paired with smp_wmb() in the ring producer path. Ensures we
+ * see any updated netdev queue state caused by a full ring.
+ * Needed for proper synchronization between the ring and the
+ * netdev queue.
+ */
+ smp_rmb();
+ rcu_read_lock();
+ dev = rcu_dereference(q->tap)->dev;
+ txq = netdev_get_tx_queue(dev, q->queue_index);
+ stopped = netif_tx_queue_stopped(txq);
+
+ /* Ensures the read for a stopped queue completes before the
+ * discard, so that we don't miss the window to wake the queue if
+ * needed.
+ */
+ smp_rmb();
+ __ptr_ring_discard_one(ring);
+
+ /* If the queue was stopped (meaning the producer couldn't have
+ * inserted new entries just now), and we have actually created
+ * space in the ring, or the ring is now empty (due to a race
+ * with the producer), then it is now safe to wake the queue.
+ */
+ if (unlikely(stopped &&
+ (__ptr_ring_consume_created_space(ring) ||
+ __ptr_ring_empty(ring)))) {
+ /* Paired with smp_rmb() in tun_ring_produce. */
+ smp_wmb();
+ netif_tx_wake_queue(txq);
+ }
+ rcu_read_unlock();
+
+ return ptr;
+}
+
+static __always_unused void *tap_ring_consume(struct tap_queue *q)
+{
+ void *ptr;
+
+ spin_lock(&q->ring.consumer_lock);
+ ptr = __tap_ring_consume(q);
+ spin_unlock(&q->ring.consumer_lock);
+
+ return ptr;
+}
+
static ssize_t tap_do_read(struct tap_queue *q,
struct iov_iter *to,
int noblock, struct sk_buff *skb)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8192740357a0..3b9d8d406ff5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -999,6 +999,107 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
return len;
}
+/* Produce a packet into the transmit ring. If the ring becomes full, the
+ * netdev queue is stopped until the consumer wakes it again.
+ */
+static __always_unused int tun_ring_produce(struct ptr_ring *ring,
+ struct netdev_queue *queue,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ spin_lock(&ring->producer_lock);
+
+ /* Pairs with smp_wmb() in __tun_ring_consume/__tap_ring_consume.
+ * Ensures that freed space by the consumer is visible.
+ */
+ smp_rmb();
+
+ /* Do not stop the netdev queue if the ptr_ring is full already.
+ * The consumer could empty out the ptr_ring in the meantime
+ * without noticing the stopped netdev queue, resulting in a
+ * stopped netdev queue and an empty ptr_ring. In this case the
+ * netdev queue would stay stopped forever.
+ */
+ if (unlikely(!__ptr_ring_full(ring) &&
+ __ptr_ring_full_next(ring)))
+ netif_tx_stop_queue(queue);
+
+ /* Note: __ptr_ring_produce has an internal smp_wmb() to synchronize the
+ * state with the consumer. This ensures that after adding an entry to
+ * the ring, any stopped queue state is visible to the consumer after
+ * dequeueing.
+ */
+ ret = __ptr_ring_produce(ring, skb);
+
+ spin_unlock(&ring->producer_lock);
+
+ return ret;
+}
+
+/*
+ * Consume a packet from the transmit ring. Callers must hold
+ * the consumer_lock of the ptr_ring. If the ring was full and the
+ * queue was stopped, this may wake up the queue if space is created.
+ */
+static void *__tun_ring_consume(struct tun_file *tfile)
+{
+ struct ptr_ring *ring = &tfile->tx_ring;
+ struct netdev_queue *txq;
+ struct net_device *dev;
+ bool stopped;
+ void *ptr;
+
+ ptr = __ptr_ring_peek(ring);
+ if (!ptr)
+ return ptr;
+
+ /* Paired with smp_wmb() in the ring producer path. Ensures we
+ * see any updated netdev queue state caused by a full ring.
+ * Needed for proper synchronization between the ring and the
+ * netdev queue.
+ */
+ smp_rmb();
+ rcu_read_lock();
+ dev = rcu_dereference(tfile->tun)->dev;
+ txq = netdev_get_tx_queue(dev, tfile->queue_index);
+ stopped = netif_tx_queue_stopped(txq);
+
+ /* Ensures the read for a stopped queue completes before the
+ * discard, so that we don't miss the window to wake the queue if
+ * needed.
+ */
+ smp_rmb();
+ __ptr_ring_discard_one(ring);
+
+ /* If the queue was stopped (meaning the producer couldn't have
+ * inserted new entries just now), and we have actually created
+ * space in the ring, or the ring is now empty (due to a race
+ * with the producer), then it is now safe to wake the queue.
+ */
+ if (unlikely(stopped &&
+ (__ptr_ring_consume_created_space(ring) ||
+ __ptr_ring_empty(ring)))) {
+ /* Paired with smp_rmb() in tun_ring_produce. */
+ smp_wmb();
+ netif_tx_wake_queue(txq);
+ }
+ rcu_read_unlock();
+
+ return ptr;
+}
+
+static void __always_unused *tun_ring_consume(struct tun_file *tfile)
+{
+ void *ptr;
+
+ spin_lock(&tfile->tx_ring.consumer_lock);
+ ptr = __tun_ring_consume(tfile);
+ spin_unlock(&tfile->tx_ring.consumer_lock);
+
+ return ptr;
+}
+
/* Net device start xmit */
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
{
--
2.43.0
On Thu, Nov 20, 2025 at 04:29:08PM +0100, Simon Schippers wrote:
> Implement new ring buffer produce and consume functions for tun and tap
> drivers that provide lockless producer-consumer synchronization and
> netdev queue management to prevent ptr_ring tail drop and permanent
> starvation.
>
> - tun_ring_produce(): Produces packets to the ptr_ring with proper memory
> barriers and proactively stops the netdev queue when the ring is about
> to become full.
>
> - __tun_ring_consume() / __tap_ring_consume(): Internal consume functions
> that check if the netdev queue was stopped due to a full ring, and wake
> it when space becomes available. Uses memory barriers to ensure proper
> ordering between producer and consumer.
>
> - tun_ring_consume() / tap_ring_consume(): Wrapper functions that acquire
> the consumer lock before calling the internal consume functions.
>
> Key features:
> - Proactive queue stopping using __ptr_ring_full_next() to stop the queue
> before it becomes completely full.
> - Not stopping the queue when the ptr_ring is full already, because if
> the consumer empties all entries in the meantime, stopping the queue
> would cause permanent starvation.
what is permanent starvation? this comment seems to answer this
question:
/* Do not stop the netdev queue if the ptr_ring is full already.
* The consumer could empty out the ptr_ring in the meantime
* without noticing the stopped netdev queue, resulting in a
* stopped netdev queue and an empty ptr_ring. In this case the
* netdev queue would stay stopped forever.
*/
why having a single entry in
the ring we never use helpful to address this?
In fact, all your patch does to solve it, is check
netif_tx_queue_stopped on every consumed packet.
I already proposed:
static inline int __ptr_ring_peek_producer(struct ptr_ring *r)
{
if (unlikely(!r->size) || r->queue[r->producer])
return -ENOSPC;
return 0;
}
And with that, why isn't avoiding the race as simple as
just rechecking after stopping the queue?
__ptr_ring_produce();
if (__ptr_ring_peek_producer())
netif_tx_stop_queue
if (!__ptr_ring_peek_producer())
netif_tx_wake_queue(txq);
--
MST
On 11/25/25 17:54, Michael S. Tsirkin wrote:
> On Thu, Nov 20, 2025 at 04:29:08PM +0100, Simon Schippers wrote:
>> Implement new ring buffer produce and consume functions for tun and tap
>> drivers that provide lockless producer-consumer synchronization and
>> netdev queue management to prevent ptr_ring tail drop and permanent
>> starvation.
>>
>> - tun_ring_produce(): Produces packets to the ptr_ring with proper memory
>> barriers and proactively stops the netdev queue when the ring is about
>> to become full.
>>
>> - __tun_ring_consume() / __tap_ring_consume(): Internal consume functions
>> that check if the netdev queue was stopped due to a full ring, and wake
>> it when space becomes available. Uses memory barriers to ensure proper
>> ordering between producer and consumer.
>>
>> - tun_ring_consume() / tap_ring_consume(): Wrapper functions that acquire
>> the consumer lock before calling the internal consume functions.
>>
>> Key features:
>> - Proactive queue stopping using __ptr_ring_full_next() to stop the queue
>> before it becomes completely full.
>> - Not stopping the queue when the ptr_ring is full already, because if
>> the consumer empties all entries in the meantime, stopping the queue
>> would cause permanent starvation.
>
> what is permanent starvation? this comment seems to answer this
> question:
>
>
> /* Do not stop the netdev queue if the ptr_ring is full already.
> * The consumer could empty out the ptr_ring in the meantime
> * without noticing the stopped netdev queue, resulting in a
> * stopped netdev queue and an empty ptr_ring. In this case the
> * netdev queue would stay stopped forever.
> */
>
>
> why having a single entry in
> the ring we never use helpful to address this?
>
>
>
>
> In fact, all your patch does to solve it, is check
> netif_tx_queue_stopped on every consumed packet.
>
>
> I already proposed:
>
> static inline int __ptr_ring_peek_producer(struct ptr_ring *r)
> {
> if (unlikely(!r->size) || r->queue[r->producer])
> return -ENOSPC;
> return 0;
> }
>
> And with that, why isn't avoiding the race as simple as
> just rechecking after stopping the queue?
I think you are right and that is quite similar to what veth [1] does.
However, there are two differences:
- Your approach avoids returning NETDEV_TX_BUSY by already stopping
when the ring becomes full (and not when the ring is full already)
- ...and the recheck of the producer wakes on !full instead of empty.
I like both aspects better than the veth implementation.
Just one thing: like the veth implementation, we probably need a
smp_mb__after_atomic() after netif_tx_stop_queue() as they also discussed
in their v6 [2].
On the consumer side, I would then just do:
__ptr_ring_consume();
if (unlikely(__ptr_ring_consume_created_space()))
netif_tx_wake_queue(txq);
Right?
And for the batched consume method, I would just call this in a loop.
Thank you!
[1] Link: https://lore.kernel.org/netdev/174559288731.827981.8748257839971869213.stgit@firesoul/T/#m2582fcc48901e2e845b20b89e0e7196951484e5f
[2] Link: https://lore.kernel.org/all/174549933665.608169.392044991754158047.stgit@firesoul/T/#m63f2deb86ffbd9ff3a27e1232077a3775606c14d
>
> __ptr_ring_produce();
> if (__ptr_ring_peek_producer())
> netif_tx_stop_queue
smp_mb__after_atomic(); // Right here
> if (!__ptr_ring_peek_producer())
> netif_tx_wake_queue(txq);
>
>
>
>
>
>
>
On Wed, Nov 26, 2025 at 10:23:50AM +0100, Simon Schippers wrote:
> On 11/25/25 17:54, Michael S. Tsirkin wrote:
> > On Thu, Nov 20, 2025 at 04:29:08PM +0100, Simon Schippers wrote:
> >> Implement new ring buffer produce and consume functions for tun and tap
> >> drivers that provide lockless producer-consumer synchronization and
> >> netdev queue management to prevent ptr_ring tail drop and permanent
> >> starvation.
> >>
> >> - tun_ring_produce(): Produces packets to the ptr_ring with proper memory
> >> barriers and proactively stops the netdev queue when the ring is about
> >> to become full.
> >>
> >> - __tun_ring_consume() / __tap_ring_consume(): Internal consume functions
> >> that check if the netdev queue was stopped due to a full ring, and wake
> >> it when space becomes available. Uses memory barriers to ensure proper
> >> ordering between producer and consumer.
> >>
> >> - tun_ring_consume() / tap_ring_consume(): Wrapper functions that acquire
> >> the consumer lock before calling the internal consume functions.
> >>
> >> Key features:
> >> - Proactive queue stopping using __ptr_ring_full_next() to stop the queue
> >> before it becomes completely full.
> >> - Not stopping the queue when the ptr_ring is full already, because if
> >> the consumer empties all entries in the meantime, stopping the queue
> >> would cause permanent starvation.
> >
> > what is permanent starvation? this comment seems to answer this
> > question:
> >
> >
> > /* Do not stop the netdev queue if the ptr_ring is full already.
> > * The consumer could empty out the ptr_ring in the meantime
> > * without noticing the stopped netdev queue, resulting in a
> > * stopped netdev queue and an empty ptr_ring. In this case the
> > * netdev queue would stay stopped forever.
> > */
> >
> >
> > why having a single entry in
> > the ring we never use helpful to address this?
> >
> >
> >
> >
> > In fact, all your patch does to solve it, is check
> > netif_tx_queue_stopped on every consumed packet.
> >
> >
> > I already proposed:
> >
> > static inline int __ptr_ring_peek_producer(struct ptr_ring *r)
> > {
> > if (unlikely(!r->size) || r->queue[r->producer])
> > return -ENOSPC;
> > return 0;
> > }
> >
> > And with that, why isn't avoiding the race as simple as
> > just rechecking after stopping the queue?
>
> I think you are right and that is quite similar to what veth [1] does.
> However, there are two differences:
>
> - Your approach avoids returning NETDEV_TX_BUSY by already stopping
> when the ring becomes full (and not when the ring is full already)
> - ...and the recheck of the producer wakes on !full instead of empty.
>
> I like both aspects better than the veth implementation.
Right.
Though frankly, someone should just fix NETDEV_TX_BUSY already
at least with the most popular qdiscs.
It is a common situation and it is just annoying that every driver has
to come up with its own scheme.
> Just one thing: like the veth implementation, we probably need a
> smp_mb__after_atomic() after netif_tx_stop_queue() as they also discussed
> in their v6 [2].
yea makes sense.
>
> On the consumer side, I would then just do:
>
> __ptr_ring_consume();
> if (unlikely(__ptr_ring_consume_created_space()))
> netif_tx_wake_queue(txq);
>
> Right?
>
> And for the batched consume method, I would just call this in a loop.
Well tun does not use batched consume does it?
> Thank you!
>
> [1] Link: https://lore.kernel.org/netdev/174559288731.827981.8748257839971869213.stgit@firesoul/T/#m2582fcc48901e2e845b20b89e0e7196951484e5f
> [2] Link: https://lore.kernel.org/all/174549933665.608169.392044991754158047.stgit@firesoul/T/#m63f2deb86ffbd9ff3a27e1232077a3775606c14d
>
> >
> > __ptr_ring_produce();
> > if (__ptr_ring_peek_producer())
> > netif_tx_stop_queue
>
> smp_mb__after_atomic(); // Right here
>
> > if (!__ptr_ring_peek_producer())
> > netif_tx_wake_queue(txq);
> >
> >
> >
> >
> >
> >
> >
On 11/26/25 16:25, Michael S. Tsirkin wrote:
> On Wed, Nov 26, 2025 at 10:23:50AM +0100, Simon Schippers wrote:
>> On 11/25/25 17:54, Michael S. Tsirkin wrote:
>>> On Thu, Nov 20, 2025 at 04:29:08PM +0100, Simon Schippers wrote:
>>>> Implement new ring buffer produce and consume functions for tun and tap
>>>> drivers that provide lockless producer-consumer synchronization and
>>>> netdev queue management to prevent ptr_ring tail drop and permanent
>>>> starvation.
>>>>
>>>> - tun_ring_produce(): Produces packets to the ptr_ring with proper memory
>>>> barriers and proactively stops the netdev queue when the ring is about
>>>> to become full.
>>>>
>>>> - __tun_ring_consume() / __tap_ring_consume(): Internal consume functions
>>>> that check if the netdev queue was stopped due to a full ring, and wake
>>>> it when space becomes available. Uses memory barriers to ensure proper
>>>> ordering between producer and consumer.
>>>>
>>>> - tun_ring_consume() / tap_ring_consume(): Wrapper functions that acquire
>>>> the consumer lock before calling the internal consume functions.
>>>>
>>>> Key features:
>>>> - Proactive queue stopping using __ptr_ring_full_next() to stop the queue
>>>> before it becomes completely full.
>>>> - Not stopping the queue when the ptr_ring is full already, because if
>>>> the consumer empties all entries in the meantime, stopping the queue
>>>> would cause permanent starvation.
>>>
>>> what is permanent starvation? this comment seems to answer this
>>> question:
>>>
>>>
>>> /* Do not stop the netdev queue if the ptr_ring is full already.
>>> * The consumer could empty out the ptr_ring in the meantime
>>> * without noticing the stopped netdev queue, resulting in a
>>> * stopped netdev queue and an empty ptr_ring. In this case the
>>> * netdev queue would stay stopped forever.
>>> */
>>>
>>>
>>> why having a single entry in
>>> the ring we never use helpful to address this?
>>>
>>>
>>>
>>>
>>> In fact, all your patch does to solve it, is check
>>> netif_tx_queue_stopped on every consumed packet.
>>>
>>>
>>> I already proposed:
>>>
>>> static inline int __ptr_ring_peek_producer(struct ptr_ring *r)
>>> {
>>> if (unlikely(!r->size) || r->queue[r->producer])
>>> return -ENOSPC;
>>> return 0;
>>> }
>>>
>>> And with that, why isn't avoiding the race as simple as
>>> just rechecking after stopping the queue?
>>
>> I think you are right and that is quite similar to what veth [1] does.
>> However, there are two differences:
>>
>> - Your approach avoids returning NETDEV_TX_BUSY by already stopping
>> when the ring becomes full (and not when the ring is full already)
>> - ...and the recheck of the producer wakes on !full instead of empty.
>>
>> I like both aspects better than the veth implementation.
>
> Right.
>
> Though frankly, someone should just fix NETDEV_TX_BUSY already
> at least with the most popular qdiscs.
>
> It is a common situation and it is just annoying that every driver has
> to come up with its own scheme.
I can not judge it, but yes, it would have made this patchset way
simpler.
>
>
>
>
>
>> Just one thing: like the veth implementation, we probably need a
>> smp_mb__after_atomic() after netif_tx_stop_queue() as they also discussed
>> in their v6 [2].
>
> yea makes sense.
>
>>
>> On the consumer side, I would then just do:
>>
>> __ptr_ring_consume();
>> if (unlikely(__ptr_ring_consume_created_space()))
>> netif_tx_wake_queue(txq);
>>
>> Right?
>>
>> And for the batched consume method, I would just call this in a loop.
>
> Well tun does not use batched consume does it?
tun does not but vhost-net does.
Since vhost-net also uses tun_net_xmit() as its ndo_start_xmit in a
tap+vhost-net setup, its consumer must also be changed. Else
tun_net_xmit() would stop the queue, but it would never be woken again.
>
>
>> Thank you!
>>
>> [1] Link: https://lore.kernel.org/netdev/174559288731.827981.8748257839971869213.stgit@firesoul/T/#m2582fcc48901e2e845b20b89e0e7196951484e5f
>> [2] Link: https://lore.kernel.org/all/174549933665.608169.392044991754158047.stgit@firesoul/T/#m63f2deb86ffbd9ff3a27e1232077a3775606c14d
>>
>>>
>>> __ptr_ring_produce();
>>> if (__ptr_ring_peek_producer())
>>> netif_tx_stop_queue
>>
>> smp_mb__after_atomic(); // Right here
>>
>>> if (!__ptr_ring_peek_producer())
>>> netif_tx_wake_queue(txq);
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>
On Wed, Nov 26, 2025 at 05:04:25PM +0100, Simon Schippers wrote:
> On 11/26/25 16:25, Michael S. Tsirkin wrote:
> > On Wed, Nov 26, 2025 at 10:23:50AM +0100, Simon Schippers wrote:
> >> On 11/25/25 17:54, Michael S. Tsirkin wrote:
> >>> On Thu, Nov 20, 2025 at 04:29:08PM +0100, Simon Schippers wrote:
> >>>> Implement new ring buffer produce and consume functions for tun and tap
> >>>> drivers that provide lockless producer-consumer synchronization and
> >>>> netdev queue management to prevent ptr_ring tail drop and permanent
> >>>> starvation.
> >>>>
> >>>> - tun_ring_produce(): Produces packets to the ptr_ring with proper memory
> >>>> barriers and proactively stops the netdev queue when the ring is about
> >>>> to become full.
> >>>>
> >>>> - __tun_ring_consume() / __tap_ring_consume(): Internal consume functions
> >>>> that check if the netdev queue was stopped due to a full ring, and wake
> >>>> it when space becomes available. Uses memory barriers to ensure proper
> >>>> ordering between producer and consumer.
> >>>>
> >>>> - tun_ring_consume() / tap_ring_consume(): Wrapper functions that acquire
> >>>> the consumer lock before calling the internal consume functions.
> >>>>
> >>>> Key features:
> >>>> - Proactive queue stopping using __ptr_ring_full_next() to stop the queue
> >>>> before it becomes completely full.
> >>>> - Not stopping the queue when the ptr_ring is full already, because if
> >>>> the consumer empties all entries in the meantime, stopping the queue
> >>>> would cause permanent starvation.
> >>>
> >>> what is permanent starvation? this comment seems to answer this
> >>> question:
> >>>
> >>>
> >>> /* Do not stop the netdev queue if the ptr_ring is full already.
> >>> * The consumer could empty out the ptr_ring in the meantime
> >>> * without noticing the stopped netdev queue, resulting in a
> >>> * stopped netdev queue and an empty ptr_ring. In this case the
> >>> * netdev queue would stay stopped forever.
> >>> */
> >>>
> >>>
> >>> why having a single entry in
> >>> the ring we never use helpful to address this?
> >>>
> >>>
> >>>
> >>>
> >>> In fact, all your patch does to solve it, is check
> >>> netif_tx_queue_stopped on every consumed packet.
> >>>
> >>>
> >>> I already proposed:
> >>>
> >>> static inline int __ptr_ring_peek_producer(struct ptr_ring *r)
> >>> {
> >>> if (unlikely(!r->size) || r->queue[r->producer])
> >>> return -ENOSPC;
> >>> return 0;
> >>> }
> >>>
> >>> And with that, why isn't avoiding the race as simple as
> >>> just rechecking after stopping the queue?
> >>
> >> I think you are right and that is quite similar to what veth [1] does.
> >> However, there are two differences:
> >>
> >> - Your approach avoids returning NETDEV_TX_BUSY by already stopping
> >> when the ring becomes full (and not when the ring is full already)
> >> - ...and the recheck of the producer wakes on !full instead of empty.
> >>
> >> I like both aspects better than the veth implementation.
> >
> > Right.
> >
> > Though frankly, someone should just fix NETDEV_TX_BUSY already
> > at least with the most popular qdiscs.
> >
> > It is a common situation and it is just annoying that every driver has
> > to come up with its own scheme.
>
> I can not judge it, but yes, it would have made this patchset way
> simpler.
>
> >
> >
> >
> >
> >
> >> Just one thing: like the veth implementation, we probably need a
> >> smp_mb__after_atomic() after netif_tx_stop_queue() as they also discussed
> >> in their v6 [2].
> >
> > yea makes sense.
> >
> >>
> >> On the consumer side, I would then just do:
> >>
> >> __ptr_ring_consume();
> >> if (unlikely(__ptr_ring_consume_created_space()))
> >> netif_tx_wake_queue(txq);
> >>
> >> Right?
> >>
> >> And for the batched consume method, I would just call this in a loop.
> >
> > Well tun does not use batched consume does it?
>
> tun does not but vhost-net does.
>
> Since vhost-net also uses tun_net_xmit() as its ndo_start_xmit in a
> tap+vhost-net setup, its consumer must also be changed. Else
> tun_net_xmit() would stop the queue, but it would never be woken again.
Ah, ok.
> >
> >
> >> Thank you!
> >>
> >> [1] Link: https://lore.kernel.org/netdev/174559288731.827981.8748257839971869213.stgit@firesoul/T/#m2582fcc48901e2e845b20b89e0e7196951484e5f
> >> [2] Link: https://lore.kernel.org/all/174549933665.608169.392044991754158047.stgit@firesoul/T/#m63f2deb86ffbd9ff3a27e1232077a3775606c14d
> >>
> >>>
> >>> __ptr_ring_produce();
> >>> if (__ptr_ring_peek_producer())
> >>> netif_tx_stop_queue
> >>
> >> smp_mb__after_atomic(); // Right here
> >>
> >>> if (!__ptr_ring_peek_producer())
> >>> netif_tx_wake_queue(txq);
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >>>
> >
© 2016 - 2025 Red Hat, Inc.