arch/um/drivers/vector_kern.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
From: Henry Barreto <me@henrybarreto.dev>
Bringing a UML vector netdev up can panic in vector_net_open() with a
fault in _raw_spin_lock().
vector_net_open() calls vector_reset_stats(), which takes the RX and TX
queue locks. However, queue allocation depends on runtime transport
options. With tap transport, vector RX/TX queues are not created and the
legacy header buffers are used instead. Taking a queue lock then
dereferences a NULL queue pointer.
Take the queue locks in vector_reset_stats() only when the corresponding
queue exists. Also move the RX queue lock in vector_poll() into the
VECTOR_RX path, so legacy RX does not touch rx_queue.
Fixes: 612a8c8e0b43 ("um: vector: Replace locks guarding queue depth with atomics")
Signed-off-by: Henry Barreto <me@henrybarreto.dev>
---
arch/um/drivers/vector_kern.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 25d9258fa592..70762f15d093 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -110,19 +110,26 @@ static void vector_reset_stats(struct vector_private *vp)
* in vector_poll.
*/
- spin_lock(&vp->rx_queue->head_lock);
+ if (vp->rx_queue)
+ spin_lock(&vp->rx_queue->head_lock);
+
vp->estats.rx_queue_max = 0;
vp->estats.rx_queue_running_average = 0;
vp->estats.rx_encaps_errors = 0;
vp->estats.sg_ok = 0;
vp->estats.sg_linearized = 0;
- spin_unlock(&vp->rx_queue->head_lock);
+
+ if (vp->rx_queue)
+ spin_unlock(&vp->rx_queue->head_lock);
+
/* TX stats are modified with TX head_lock held
* in vector_send.
*/
- spin_lock(&vp->tx_queue->head_lock);
+ if (vp->tx_queue)
+ spin_lock(&vp->tx_queue->head_lock);
+
vp->estats.tx_timeout_count = 0;
vp->estats.tx_restart_queue = 0;
vp->estats.tx_kicks = 0;
@@ -130,7 +137,10 @@ static void vector_reset_stats(struct vector_private *vp)
vp->estats.tx_flow_control_xoff = 0;
vp->estats.tx_queue_max = 0;
vp->estats.tx_queue_running_average = 0;
- spin_unlock(&vp->tx_queue->head_lock);
+
+ if (vp->tx_queue)
+ spin_unlock(&vp->tx_queue->head_lock);
+
}
static int get_mtu(struct arglist *def)
@@ -1168,15 +1178,15 @@ static int vector_poll(struct napi_struct *napi, int budget)
if ((vp->options & VECTOR_TX) != 0)
tx_enqueued = (vector_send(vp->tx_queue) > 0);
- spin_lock(&vp->rx_queue->head_lock);
- if ((vp->options & VECTOR_RX) > 0)
+ if ((vp->options & VECTOR_RX) > 0) {
+ spin_lock(&vp->rx_queue->head_lock);
err = vector_mmsg_rx(vp, budget);
- else {
+ spin_unlock(&vp->rx_queue->head_lock);
+ } else {
err = vector_legacy_rx(vp);
if (err > 0)
err = 1;
}
- spin_unlock(&vp->rx_queue->head_lock);
if (err > 0)
work_done += err;
--
2.54.0
On 27/05/2026 22:35, Henry Barreto wrote:
> From: Henry Barreto <me@henrybarreto.dev>
>
> Bringing a UML vector netdev up can panic in vector_net_open() with a
> fault in _raw_spin_lock().
>
> vector_net_open() calls vector_reset_stats(), which takes the RX and TX
> queue locks. However, queue allocation depends on runtime transport
> options. With tap transport, vector RX/TX queues are not created and the
> legacy header buffers are used instead. Taking a queue lock then
> dereferences a NULL queue pointer.
>
> Take the queue locks in vector_reset_stats() only when the corresponding
> queue exists. Also move the RX queue lock in vector_poll() into the
> VECTOR_RX path, so legacy RX does not touch rx_queue.
>
> Fixes: 612a8c8e0b43 ("um: vector: Replace locks guarding queue depth with atomics")
> Signed-off-by: Henry Barreto <me@henrybarreto.dev>
> ---
> arch/um/drivers/vector_kern.c | 26 ++++++++++++++++++--------
> 1 file changed, 18 insertions(+), 8 deletions(-)
>
> diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
> index 25d9258fa592..70762f15d093 100644
> --- a/arch/um/drivers/vector_kern.c
> +++ b/arch/um/drivers/vector_kern.c
> @@ -110,19 +110,26 @@ static void vector_reset_stats(struct vector_private *vp)
> * in vector_poll.
> */
>
> - spin_lock(&vp->rx_queue->head_lock);
> + if (vp->rx_queue)
> + spin_lock(&vp->rx_queue->head_lock);
> +
> vp->estats.rx_queue_max = 0;
> vp->estats.rx_queue_running_average = 0;
> vp->estats.rx_encaps_errors = 0;
> vp->estats.sg_ok = 0;
> vp->estats.sg_linearized = 0;
> - spin_unlock(&vp->rx_queue->head_lock);
> +
> + if (vp->rx_queue)
> + spin_unlock(&vp->rx_queue->head_lock);
> +
>
> /* TX stats are modified with TX head_lock held
> * in vector_send.
> */
>
> - spin_lock(&vp->tx_queue->head_lock);
> + if (vp->tx_queue)
> + spin_lock(&vp->tx_queue->head_lock);
> +
> vp->estats.tx_timeout_count = 0;
> vp->estats.tx_restart_queue = 0;
> vp->estats.tx_kicks = 0;
> @@ -130,7 +137,10 @@ static void vector_reset_stats(struct vector_private *vp)
> vp->estats.tx_flow_control_xoff = 0;
> vp->estats.tx_queue_max = 0;
> vp->estats.tx_queue_running_average = 0;
> - spin_unlock(&vp->tx_queue->head_lock);
> +
> + if (vp->tx_queue)
> + spin_unlock(&vp->tx_queue->head_lock);
> +
> }
>
> static int get_mtu(struct arglist *def)
> @@ -1168,15 +1178,15 @@ static int vector_poll(struct napi_struct *napi, int budget)
>
> if ((vp->options & VECTOR_TX) != 0)
> tx_enqueued = (vector_send(vp->tx_queue) > 0);
> - spin_lock(&vp->rx_queue->head_lock);
> - if ((vp->options & VECTOR_RX) > 0)
> + if ((vp->options & VECTOR_RX) > 0) {
> + spin_lock(&vp->rx_queue->head_lock);
> err = vector_mmsg_rx(vp, budget);
> - else {
> + spin_unlock(&vp->rx_queue->head_lock);
> + } else {
> err = vector_legacy_rx(vp);
> if (err > 0)
> err = 1;
> }
> - spin_unlock(&vp->rx_queue->head_lock);
> if (err > 0)
> work_done += err;
>
Acked-By: Anton Ivanov <anton.ivanov@cambridgegreys.com>
--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/
On Wed, 2026-05-27 at 18:35 -0300, Henry Barreto wrote:
> From: Henry Barreto <me@henrybarreto.dev>
>
> Bringing a UML vector netdev up can panic in vector_net_open() with a
> fault in _raw_spin_lock().
>
> vector_net_open() calls vector_reset_stats(), which takes the RX and TX
> queue locks. However, queue allocation depends on runtime transport
> options. With tap transport, vector RX/TX queues are not created and the
> legacy header buffers are used instead. Taking a queue lock then
> dereferences a NULL queue pointer.
>
> Take the queue locks in vector_reset_stats() only when the corresponding
> queue exists. Also move the RX queue lock in vector_poll() into the
> VECTOR_RX path, so legacy RX does not touch rx_queue.
>
> Fixes: 612a8c8e0b43 ("um: vector: Replace locks guarding queue depth with atomics")
So ... you're effectively saying that the tap transport has been broken
since 6.12, released ~1.5 years ago.
Maybe we should just remove that entirely since nobody complained?
johannes
On 28/05/2026 08:13, Johannes Berg wrote:
> On Wed, 2026-05-27 at 18:35 -0300, Henry Barreto wrote:
>> From: Henry Barreto <me@henrybarreto.dev>
>>
>> Bringing a UML vector netdev up can panic in vector_net_open() with a
>> fault in _raw_spin_lock().
>>
>> vector_net_open() calls vector_reset_stats(), which takes the RX and TX
>> queue locks. However, queue allocation depends on runtime transport
>> options. With tap transport, vector RX/TX queues are not created and the
>> legacy header buffers are used instead. Taking a queue lock then
>> dereferences a NULL queue pointer.
>>
>> Take the queue locks in vector_reset_stats() only when the corresponding
>> queue exists. Also move the RX queue lock in vector_poll() into the
>> VECTOR_RX path, so legacy RX does not touch rx_queue.
>>
>> Fixes: 612a8c8e0b43 ("um: vector: Replace locks guarding queue depth with atomics")
>
> So ... you're effectively saying that the tap transport has been broken
> since 6.12, released ~1.5 years ago.
>
> Maybe we should just remove that entirely since nobody complained?
>
> johannes
>
>
More interesting while it was not observed in testing.
The patch is OK otherwise. I will ack it.
--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/
On 28/05/2026 08:13, Johannes Berg wrote:
> On Wed, 2026-05-27 at 18:35 -0300, Henry Barreto wrote:
>> From: Henry Barreto <me@henrybarreto.dev>
>>
>> Bringing a UML vector netdev up can panic in vector_net_open() with a
>> fault in _raw_spin_lock().
>>
>> vector_net_open() calls vector_reset_stats(), which takes the RX and TX
>> queue locks. However, queue allocation depends on runtime transport
>> options. With tap transport, vector RX/TX queues are not created and the
>> legacy header buffers are used instead. Taking a queue lock then
>> dereferences a NULL queue pointer.
>>
>> Take the queue locks in vector_reset_stats() only when the corresponding
>> queue exists. Also move the RX queue lock in vector_poll() into the
>> VECTOR_RX path, so legacy RX does not touch rx_queue.
>>
>> Fixes: 612a8c8e0b43 ("um: vector: Replace locks guarding queue depth with atomics")
>
> So ... you're effectively saying that the tap transport has been broken
> since 6.12, released ~1.5 years ago.
The one I use is raw :) It is the most useful.
Tap badly needs someone to expose the socket which the kernel uses to implement it. I started writing a kernel helper at some point, but never finished it. It should be able to produce "stupid speeds" with it.
And, yes indeed, this was broken by 612a8c8e0b43ba7e3d0e51f6f76a5fec4912d439 / 2024-07-05 which by the look of it I did not test for tap.
Nice catch.
>
> Maybe we should just remove that entirely since nobody complained?
>
> johannes
>
--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/
On Thu, 2026-05-28 at 09:00 +0100, Anton Ivanov wrote: > > > > So ... you're effectively saying that the tap transport has been broken > > since 6.12, released ~1.5 years ago. > > The one I use is raw :) It is the most useful. Right. > Tap badly needs someone to expose the socket which the kernel uses to implement it. I started writing a kernel helper at some point, but never finished it. It should be able to produce "stupid speeds" with it. > > And, yes indeed, this was broken by 612a8c8e0b43ba7e3d0e51f6f76a5fec4912d439 / 2024-07-05 which by the look of it I did not test for tap. > Yeah, I guess I'm just thinking that clearly nobody uses it, the infrastructure isn't there (as you say), and it was broken for a rather long time now and nobody complained (that we know of.) So ... is it worth even keeping it? I'm fine either way, just wondering. johannes
On 28/05/2026 09:17, Johannes Berg wrote: > On Thu, 2026-05-28 at 09:00 +0100, Anton Ivanov wrote: >>> So ... you're effectively saying that the tap transport has been broken >>> since 6.12, released ~1.5 years ago. >> The one I use is raw :) It is the most useful. > Right. > >> Tap badly needs someone to expose the socket which the kernel uses to implement it. I started writing a kernel helper at some point, but never finished it. It should be able to produce "stupid speeds" with it. >> >> And, yes indeed, this was broken by 612a8c8e0b43ba7e3d0e51f6f76a5fec4912d439 / 2024-07-05 which by the look of it I did not test for tap. >> > Yeah, I guess I'm just thinking that clearly nobody uses it, the > infrastructure isn't there (as you say), and it was broken for a rather > long time now and nobody complained (that we know of.) That part of the code also gets hit if you turn off vector RX or TX from the command line, so the patch is needed anyway. > > So ... is it worth even keeping it? I'm fine either way, just wondering. > > johannes > > -- Anton R. Ivanov Cambridgegreys Limited. Registered in England. Company Number 10273661 https://www.cambridgegreys.com/
© 2016 - 2026 Red Hat, Inc.