net/xdp/xsk.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-)
xsk_init() previously registered the PF_XDP socket family before the
per-net subsystem and other prerequisites (netdevice notifier, caches)
were fully initialized.
This exposed .create = xsk_create() to user space while per-netns
state (net->xdp.lock/list) was still uninitialized. A task with
CAP_NET_RAW could trigger this during boot/module load by calling
socket(PF_XDP, SOCK_RAW, 0) concurrently with xsk_init(), leading
to a NULL pointer dereference or use-after-free in the list manipulation.
To fix this, move sock_register() to the end of the initialization
sequence, ensuring that all required kernel structures are ready before
exposing the AF_XDP interface to userspace.
Accordingly, reorder the error unwind path to ensure proper cleanup
in reverse order of initialization. Also, explicitly add
kmem_cache_destroy() in the error path to prevent leaking
xsk_tx_generic_cache if the registration fails.
Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt")
Signed-off-by: Kery Qi <qikeyu2017@gmail.com>
---
net/xdp/xsk.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f093c3453f64..d402f23dfd8e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/rculist.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <net/xdp_sock_drv.h>
#include <net/busy_poll.h>
#include <net/netdev_lock.h>
@@ -1922,13 +1923,9 @@ static int __init xsk_init(void)
if (err)
goto out;
- err = sock_register(&xsk_family_ops);
- if (err)
- goto out_proto;
-
err = register_pernet_subsys(&xsk_net_ops);
if (err)
- goto out_sk;
+ goto out_proto;
err = register_netdevice_notifier(&xsk_netdev_notifier);
if (err)
@@ -1939,17 +1936,21 @@ static int __init xsk_init(void)
0, SLAB_HWCACHE_ALIGN, NULL);
if (!xsk_tx_generic_cache) {
err = -ENOMEM;
- goto out_unreg_notif;
+ goto out_notifier;
}
+ err = sock_register(&xsk_family_ops);
+ if (err)
+ goto out_cache;
+
return 0;
-out_unreg_notif:
+out_cache:
+ kmem_cache_destroy(xsk_tx_generic_cache);
+out_notifier:
unregister_netdevice_notifier(&xsk_netdev_notifier);
out_pernet:
unregister_pernet_subsys(&xsk_net_ops);
-out_sk:
- sock_unregister(PF_XDP);
out_proto:
proto_unregister(&xsk_proto);
out:
--
2.34.1
On 01/09, Kery Qi wrote: > xsk_init() previously registered the PF_XDP socket family before the > per-net subsystem and other prerequisites (netdevice notifier, caches) > were fully initialized. > > This exposed .create = xsk_create() to user space while per-netns > state (net->xdp.lock/list) was still uninitialized. A task with > CAP_NET_RAW could trigger this during boot/module load by calling > socket(PF_XDP, SOCK_RAW, 0) concurrently with xsk_init(), leading > to a NULL pointer dereference or use-after-free in the list manipulation. > > To fix this, move sock_register() to the end of the initialization > sequence, ensuring that all required kernel structures are ready before > exposing the AF_XDP interface to userspace. > > Accordingly, reorder the error unwind path to ensure proper cleanup > in reverse order of initialization. Also, explicitly add > kmem_cache_destroy() in the error path to prevent leaking > xsk_tx_generic_cache if the registration fails. Is it something that you've hit in real life? xsk_init happens so early during the init process (fs_init) that I don't understand why the oder would matter.
© 2016 - 2026 Red Hat, Inc.