drivers/net/tun.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
During the introduction of struct bpf_net_context handling for
XDP-redirect, the tun driver has been missed.
Set the bpf_net_context before invoking BPF XDP program within the TUN
driver.
Reported-by: syzbot+0b5c75599f1d872bea6f@syzkaller.appspotmail.com
Reported-by: syzbot+5ae46b237278e2369cac@syzkaller.appspotmail.com
Reported-by: syzbot+c1e04a422bbc0f0f2921@syzkaller.appspotmail.com
Fixes: 401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/net/tun.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9254bca2813dc..df4dd6b7479e1 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1661,6 +1661,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
int len, int *skb_xdp)
{
struct page_frag *alloc_frag = ¤t->task_frag;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog *xdp_prog;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
char *buf;
@@ -1700,6 +1701,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
struct xdp_buff xdp;
@@ -1728,12 +1730,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
pad = xdp.data - xdp.data_hard_start;
len = xdp.data_end - xdp.data;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
out:
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return NULL;
@@ -1914,20 +1918,24 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
skb_record_rx_queue(skb, tfile->queue_index);
if (skb_xdp) {
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog *xdp_prog;
int ret;
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
ret = do_xdp_generic(xdp_prog, &skb);
if (ret != XDP_PASS) {
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
goto unlock_frags;
}
}
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
}
@@ -2566,6 +2574,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
ctl && ctl->type == TUN_MSG_PTR) {
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct tun_page tpage;
int n = ctl->num;
int flush = 0, queued = 0;
@@ -2574,6 +2583,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
@@ -2588,6 +2598,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (tfile->napi_enabled && queued > 0)
napi_schedule(&tfile->napi);
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
--
2.45.2
On Wed, 3 Jul 2024 14:27:58 +0200 Sebastian Andrzej Siewior wrote: > During the introduction of struct bpf_net_context handling for > XDP-redirect, the tun driver has been missed. > > Set the bpf_net_context before invoking BPF XDP program within the TUN > driver. Sorry if I'm missing the point but I think this is insufficient. You've covered the NAPI-like entry point to the Rx stack in your initial work, but there's also netif_receive_skb() which drivers may call outside of NAPI, simply disabling BH before the call. The only concern in that case is that we end up in do_xdp_generic(), and there's no bpf_net_set_ctx() anywhere on the way. So my intuition would be to add the bpf_net_set_ctx() inside the if(xdp_prog) in do_xdp_generic(). "XDP generic" has less stringent (more TC-like) perf requirements, so setting context once per packet should be fine. And it will also cover drivers like TUN which use both netif_receive_skb() and call do_xdp_generic(), in a single place.
Hello Sebastian, Jakub,
On Wed, Jul 03, 2024 at 12:01:43PM -0700, Jakub Kicinski wrote:
> On Wed, 3 Jul 2024 14:27:58 +0200 Sebastian Andrzej Siewior wrote:
> > During the introduction of struct bpf_net_context handling for
> > XDP-redirect, the tun driver has been missed.
> >
> > Set the bpf_net_context before invoking BPF XDP program within the TUN
> > driver.
>
> Sorry if I'm missing the point but I think this is insufficient.
> You've covered the NAPI-like entry point to the Rx stack in your
> initial work, but there's also netif_receive_skb() which drivers
> may call outside of NAPI, simply disabling BH before the call.
I've seen some crashes in 6.11-rc7 that seems related to 401cb7dae8130
("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.").
Basically bpf_net_context is NULL, and it is being dereferenced by
bpf_net_ctx->ri.kern_flags (offset 0x38) in the following code.
static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
{
struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
That said, it means that bpf_net_ctx_get() is returning NULL.
This stack is coming from the bpf function bpf_redirect()
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
Since I don't think there is XDP involved, I wondering if we need some
preotection before calling bpf_redirect()
There is the full stack, against bc83b4d1f0869 ("Merge tag
'bcachefs-2024-09-09' of git://evilpiepirate.org/bcachefs")
[ 138.278753] BUG: kernel NULL pointer dereference, address: 0000000000000038
[ 138.292684] #PF: supervisor read access in kernel mode
[ 138.302954] #PF: error_code(0x0000) - not-present page
[ 138.313224] PGD 8fc4e6067 P4D 8fc4e6067 PUD 8fc4e5067 PMD 0
[ 138.324539] Oops: Oops: 0000 [#1] SMP
[ 138.357085] Tainted: [S]=CPU_OUT_OF_SPEC, [E]=UNSIGNED_MODULE
[ 138.368574] Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A23 12/08/2020
[ 138.385971] RIP: 0010:bpf_redirect (./include/linux/filter.h:788 net/core/filter.c:2531 net/core/filter.c:2529)
[ 138.394509] Code: e9 79 ff ff ff 0f 0b cc cc cc cc cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 65 48 8b 04 25 00 2f 03 00 48 8b 80 20 0c 00 00 <8b> 48 38 f6 c1 02 75 2c c7 40 20 00 00 00 00 48 c7 40 18 00 00 00
All code
========
0: e9 79 ff ff ff jmp 0xffffffffffffff7e
5: 0f 0b ud2
7: cc int3
8: cc int3
9: cc int3
a: cc int3
b: cc int3
c: cc int3
d: cc int3
e: cc int3
f: cc int3
10: cc int3
11: cc int3
12: cc int3
13: cc int3
14: cc int3
15: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
1a: 65 48 8b 04 25 00 2f mov %gs:0x32f00,%rax
21: 03 00
23: 48 8b 80 20 0c 00 00 mov 0xc20(%rax),%rax
2a:* 8b 48 38 mov 0x38(%rax),%ecx <-- trapping instruction
2d: f6 c1 02 test $0x2,%cl
30: 75 2c jne 0x5e
32: c7 40 20 00 00 00 00 movl $0x0,0x20(%rax)
39: 48 rex.W
3a: c7 .byte 0xc7
3b: 40 18 00 rex sbb %al,(%rax)
...
Code starting with the faulting instruction
===========================================
0: 8b 48 38 mov 0x38(%rax),%ecx
3: f6 c1 02 test $0x2,%cl
6: 75 2c jne 0x34
8: c7 40 20 00 00 00 00 movl $0x0,0x20(%rax)
f: 48 rex.W
10: c7 .byte 0xc7
11: 40 18 00 rex sbb %al,(%rax)
...
[ 138.432073] RSP: 0018:ffffc9000f0e33d8 EFLAGS: 00010246
[ 138.442523] RAX: 0000000000000000 RBX: ffff888288d4dae0 RCX: ffff888290f6dde2
[ 138.456801] RDX: 00000000000000a8 RSI: 0000000000000000 RDI: 0000000000000002
[ 138.471080] RBP: ffffc9000f0e3450 R08: 0000000000000000 R09: 0000000000000000
[ 138.485354] R10: 0000000000000000 R11: 0000000000000005 R12: ffff88829776aa68
[ 138.499624] R13: 0000000000000002 R14: 0000000000000000 R15: 0000000000000002
[ 138.513894] FS: 00007f0a67000640(0000) GS:ffff88903f880000(0000) knlGS:0000000000000000
[ 138.530076] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 138.541562] CR2: 0000000000000038 CR3: 00000008fc4e8005 CR4: 00000000007706f0
[ 138.555830] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 138.570097] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 138.584364] PKRU: 55555554
[ 138.589769] Call Trace:
[ 138.594656] <TASK>
[ 138.598850] ? __die_body (arch/x86/kernel/dumpstack.c:421)
[ 138.605826] ? page_fault_oops (arch/x86/mm/fault.c:711)
[ 138.614017] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
[ 138.621859] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
[ 138.630227] ? bpf_redirect (./include/linux/filter.h:788 net/core/filter.c:2531 net/core/filter.c:2529)
[ 138.637547] bpf_prog_61d4b6831e57702d_tw_ns_nk2phy+0x31c/0x327
[ 138.649385] ? bpf_selem_link_map (./kernel/bpf/bpf_local_storage.c:402)
[ 138.657748] netkit_xmit (./include/linux/bpf.h:1243 ./include/linux/filter.h:691 ./include/linux/filter.h:698 drivers/net/netkit.c:46 drivers/net/netkit.c:86)
[ 138.664898] dev_hard_start_xmit (./include/linux/netdevice.h:4913 ./include/linux/netdevice.h:4922 net/core/dev.c:3580 net/core/dev.c:3596)
[ 138.673263] __dev_queue_xmit (net/core/dev.h:168 net/core/dev.c:4424)
[ 138.681279] ? __dev_queue_xmit (./include/linux/bottom_half.h:? ./include/linux/rcupdate.h:890 net/core/dev.c:4348)
[ 138.689470] ip6_finish_output2 (./include/net/neighbour.h:? net/ipv6/ip6_output.c:141)
[ 138.697833] ip6_finish_output (net/ipv6/ip6_output.c:? net/ipv6/ip6_output.c:226)
[ 138.706021] ip6_output (./include/linux/netfilter.h:303 net/ipv6/ip6_output.c:247)
[ 138.712643] ? __rmqueue_pcplist (mm/page_alloc.c:2976)
[ 138.721350] ip6_xmit (net/ipv6/ip6_output.c:380)
[ 138.727976] ? refill_obj_stock.llvm.9389014391162377460 (mm/memcontrol.c:2912)
[ 138.740509] ? security_sk_classify_flow (security/security.c:?)
[ 138.750088] ? __sk_dst_check (net/core/sock.c:599)
[ 138.757756] inet6_csk_xmit (net/ipv6/inet6_connection_sock.c:135)
[ 138.765080] __tcp_transmit_skb (net/ipv4/tcp_output.c:1466)
[ 138.773445] ? _copy_from_iter (./arch/x86/include/asm/uaccess_64.h:110 ./arch/x86/include/asm/uaccess_64.h:118 ./arch/x86/include/asm/uaccess_64.h:125 lib/iov_iter.c:55 ./include/linux/iov_iter.h:51 ./include/linux/iov_iter.h:247 ./include/linux/iov_iter.h:271 lib/iov_iter.c:249 lib/iov_iter.c:260)
[ 138.781460] tcp_connect (net/ipv4/tcp_output.c:4032 net/ipv4/tcp_output.c:4142)
[ 138.788605] ? bpf_trampoline_6442578911+0x59/0xa3
[ 138.798183] tcp_v6_connect (net/ipv6/tcp_ipv6.c:333)
[ 138.805854] __inet_stream_connect (net/ipv4/af_inet.c:680)
[ 138.814565] ? __kmalloc_cache_noprof (./arch/x86/include/asm/jump_label.h:55 ./include/linux/memcontrol.h:1694 mm/slub.c:2158 mm/slub.c:4002 mm/slub.c:4041 mm/slub.c:4188)
[ 138.823795] tcp_sendmsg_fastopen (net/ipv4/tcp.c:1035)
[ 138.832507] tcp_sendmsg_locked (net/ipv4/tcp.c:1087)
[ 138.840870] ? lock_sock_nested (net/core/sock.c:3551)
[ 138.848883] ? __bpf_prog_exit_recur (./kernel/bpf/trampoline.c:909)
[ 138.857765] tcp_sendmsg (net/ipv4/tcp.c:1354)
[ 138.864562] ____sys_sendmsg.llvm.5426677171080474013 (net/socket.c:733 net/socket.c:745 net/socket.c:2597)
[ 138.876749] ? __import_iovec (./include/linux/err.h:61 lib/iov_iter.c:1282)
[ 138.884590] ___sys_sendmsg (net/socket.c:2651)
[ 138.892084] ? do_pte_missing (mm/memory.c:5019 mm/memory.c:5052 mm/memory.c:5191 mm/memory.c:3947)
[ 138.900274] ? __perf_sw_event (kernel/events/internal.h:228 kernel/events/core.c:10002 kernel/events/core.c:10027)
[ 138.908115] ? handle_mm_fault (mm/memory.c:? mm/memory.c:5858)
[ 138.916477] __x64_sys_sendmsg (net/socket.c:2680 net/socket.c:2689 net/socket.c:2687 net/socket.c:2687)
[ 138.924317] do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83)
[ 138.931638] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539)
[ 138.939477] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[ 138.949575] RIP: 0033:0x7f0b1e1293eb
[ 138.956732] Code: 48 89 e5 48 83 ec 20 89 55 ec 48 89 75 f0 89 7d f8 e8 99 a6 f6 ff 41 89 c0 8b 55 ec 48 8b 75 f0 8b 7d f8 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 48 89 45 f8 e8 d1 a6 f6 ff 48 8b
All code
========
0: 48 89 e5 mov %rsp,%rbp
3: 48 83 ec 20 sub $0x20,%rsp
7: 89 55 ec mov %edx,-0x14(%rbp)
a: 48 89 75 f0 mov %rsi,-0x10(%rbp)
e: 89 7d f8 mov %edi,-0x8(%rbp)
11: e8 99 a6 f6 ff call 0xfffffffffff6a6af
16: 41 89 c0 mov %eax,%r8d
19: 8b 55 ec mov -0x14(%rbp),%edx
1c: 48 8b 75 f0 mov -0x10(%rbp),%rsi
20: 8b 7d f8 mov -0x8(%rbp),%edi
23: b8 2e 00 00 00 mov $0x2e,%eax
28: 0f 05 syscall
2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction
30: 77 35 ja 0x67
32: 44 89 c7 mov %r8d,%edi
35: 48 89 45 f8 mov %rax,-0x8(%rbp)
39: e8 d1 a6 f6 ff call 0xfffffffffff6a70f
3e: 48 rex.W
3f: 8b .byte 0x8b
Code starting with the faulting instruction
===========================================
0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
6: 77 35 ja 0x3d
8: 44 89 c7 mov %r8d,%edi
b: 48 89 45 f8 mov %rax,-0x8(%rbp)
f: e8 d1 a6 f6 ff call 0xfffffffffff6a6e5
14: 48 rex.W
15: 8b .byte 0x8b
[ 138.994291] RSP: 002b:00007f0a66ffc220 EFLAGS: 00000293 ORIG_RAX: 000000000000002e
[ 139.009429] RAX: ffffffffffffffda RBX: 00007f0a66ffc548 RCX: 00007f0b1e1293eb
[ 139.023697] RDX: 0000000020004040 RSI: 00007f0a66ffc370 RDI: 0000000000000172
[ 139.037965] RBP: 00007f0a66ffc240 R08: 0000000000000000 R09: 00007f0a66411228
[ 139.052234] R10: 00007f0a66ffc678 R11: 0000000000000293 R12: 00007f0a66ffc4c0
[ 139.066504] R13: 000000000000001c R14: 00007f0a66443000 R15: 0000000000000021
[ 139.080776] </TASK>
[ 139.085138] Modules linked in: sunrpc(E) bpf_preload(E) sch_fq(E) squashfs(E) tls(E) tcp_diag(E) inet_diag(E) act_gact(E) cls_bpf(E) intel_uncore_frequency(E) intel_uncore_frequency_common(E) skx_edac(E) skx_edac_common(E) nfit(E) libnvdimm(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) iTCO_wdt(E) iTCO_vendor_support(E) evdev(E) xhci_pci(E) i2c_i801(E) kvm(E) acpi_cpufreq(E) i2c_smbus(E) xhci_hcd(E) wmi(E) ipmi_si(E) ipmi_devintf(E) ipmi_msghandler(E) button(E) sch_fq_codel(E) vhost_net(E) tun(E) vhost(E) vhost_iotlb(E) tap(E) mpls_gso(E) mpls_iptunnel(E) mpls_router(E) fou(E) loop(E) drm(E) backlight(E) drm_panel_orientation_quirks(E) autofs4(E) efivarfs(E)
On 2024-09-12 05:06:36 [-0700], Breno Leitao wrote:
> Hello Sebastian, Jakub,
Hi,
> I've seen some crashes in 6.11-rc7 that seems related to 401cb7dae8130
> ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.").
>
> Basically bpf_net_context is NULL, and it is being dereferenced by
> bpf_net_ctx->ri.kern_flags (offset 0x38) in the following code.
>
> static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
> {
> struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
> if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
>
> That said, it means that bpf_net_ctx_get() is returning NULL.
>
> This stack is coming from the bpf function bpf_redirect()
> BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
> {
> struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
>
>
> Since I don't think there is XDP involved, I wondering if we need some
> preotection before calling bpf_redirect()
This origins in netkit_xmit(). If my memory serves me, then Daniel told
me that netkit is not doing any redirect and therefore does not need
"this". This must have been during one of the first "designs"/ versions.
If you are saying, that this is possible then something must be done.
Either assign a context or reject the bpf program.
Sebastian
Sebastian Andrzej Siewior <bigeasy@linutronix.de> writes:
> On 2024-09-12 05:06:36 [-0700], Breno Leitao wrote:
>> Hello Sebastian, Jakub,
> Hi,
>
>> I've seen some crashes in 6.11-rc7 that seems related to 401cb7dae8130
>> ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.").
>>
>> Basically bpf_net_context is NULL, and it is being dereferenced by
>> bpf_net_ctx->ri.kern_flags (offset 0x38) in the following code.
>>
>> static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
>> {
>> struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
>> if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
>>
>> That said, it means that bpf_net_ctx_get() is returning NULL.
>>
>> This stack is coming from the bpf function bpf_redirect()
>> BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
>> {
>> struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
>>
>>
>> Since I don't think there is XDP involved, I wondering if we need some
>> preotection before calling bpf_redirect()
>
> This origins in netkit_xmit(). If my memory serves me, then Daniel told
> me that netkit is not doing any redirect and therefore does not need
> "this". This must have been during one of the first "designs"/ versions.
>
> If you are saying, that this is possible then something must be done.
> Either assign a context or reject the bpf program.
Netkit definitely redirects, so it should assign a context object in
netkit_xmit()...
-Toke
Hello Sabastian,
Thanks for the quick reply!
On Thu, Sep 12, 2024 at 02:28:47PM +0200, Sebastian Andrzej Siewior wrote:
> On 2024-09-12 05:06:36 [-0700], Breno Leitao wrote:
> > Hello Sebastian, Jakub,
> Hi,
>
> > I've seen some crashes in 6.11-rc7 that seems related to 401cb7dae8130
> > ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.").
> >
> > Basically bpf_net_context is NULL, and it is being dereferenced by
> > bpf_net_ctx->ri.kern_flags (offset 0x38) in the following code.
> >
> > static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
> > {
> > struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
> > if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
> >
> > That said, it means that bpf_net_ctx_get() is returning NULL.
> >
> > This stack is coming from the bpf function bpf_redirect()
> > BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
> > {
> > struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
> >
> >
> > Since I don't think there is XDP involved, I wondering if we need some
> > preotection before calling bpf_redirect()
>
> This origins in netkit_xmit(). If my memory serves me, then Daniel told
> me that netkit is not doing any redirect and therefore does not need
> "this". This must have been during one of the first "designs"/ versions.
Right, I've seen several crashes related to this, and in all of them it
is through netkit_xmit() -> netkit_run() -> bpf_prog_run()
> If you are saying, that this is possible then something must be done.
> Either assign a context or reject the bpf program.
If we want to assign a context, do you meant something like the
following?
Author: Breno Leitao <leitao@debian.org>
Date: Thu Sep 12 06:11:28 2024 -0700
netkit: Assign missing bpf_net_context.
During the introduction of struct bpf_net_context handling for
XDP-redirect, the netkit driver has been missed.
Set the bpf_net_context before invoking netkit_xmit() program within the
netkit driver.
Fixes: 401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.")
Signed-off-by: Breno Leitao <leitao@debian.org>
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 79232f5cc088..f8af57b7a1e8 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -65,6 +65,7 @@ static struct netkit *netkit_priv(const struct net_device *dev)
static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct netkit *nk = netkit_priv(dev);
enum netkit_action ret = READ_ONCE(nk->policy);
netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
@@ -72,6 +73,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_device *peer;
int len = skb->len;
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
rcu_read_lock();
peer = rcu_dereference(nk->peer);
if (unlikely(!peer || !(peer->flags & IFF_UP) ||
@@ -110,6 +112,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
break;
}
rcu_read_unlock();
+ bpf_net_ctx_clear(bpf_net_ctx);
return ret_dev;
}
On 9/12/24 3:17 PM, Breno Leitao wrote:
> On Thu, Sep 12, 2024 at 02:28:47PM +0200, Sebastian Andrzej Siewior wrote:
>> On 2024-09-12 05:06:36 [-0700], Breno Leitao wrote:
>>> Hello Sebastian, Jakub,
>> Hi,
>>
>>> I've seen some crashes in 6.11-rc7 that seems related to 401cb7dae8130
>>> ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.").
>>>
>>> Basically bpf_net_context is NULL, and it is being dereferenced by
>>> bpf_net_ctx->ri.kern_flags (offset 0x38) in the following code.
>>>
>>> static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
>>> {
>>> struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
>>> if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
>>>
>>> That said, it means that bpf_net_ctx_get() is returning NULL.
>>>
>>> This stack is coming from the bpf function bpf_redirect()
>>> BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
>>> {
>>> struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
>>>
>>>
>>> Since I don't think there is XDP involved, I wondering if we need some
>>> preotection before calling bpf_redirect()
>>
>> This origins in netkit_xmit(). If my memory serves me, then Daniel told
>> me that netkit is not doing any redirect and therefore does not need
>> "this". This must have been during one of the first "designs"/ versions.
>
> Right, I've seen several crashes related to this, and in all of them it
> is through netkit_xmit() -> netkit_run() -> bpf_prog_run()
>
>> If you are saying, that this is possible then something must be done.
>> Either assign a context or reject the bpf program.
>
> If we want to assign a context, do you meant something like the
> following?
>
> Author: Breno Leitao <leitao@debian.org>
> Date: Thu Sep 12 06:11:28 2024 -0700
>
> netkit: Assign missing bpf_net_context.
>
> During the introduction of struct bpf_net_context handling for
> XDP-redirect, the netkit driver has been missed.
>
> Set the bpf_net_context before invoking netkit_xmit() program within the
> netkit driver.
>
> Fixes: 401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.")
> Signed-off-by: Breno Leitao <leitao@debian.org>
Oh well, quite annoying that we need this context now everywhere also outside of XDP :(
Sebastian, do you see any way where this could be noop for !PREEMPT_RT?
Anyway, fix looks good to me, thanks!
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
> index 79232f5cc088..f8af57b7a1e8 100644
> --- a/drivers/net/netkit.c
> +++ b/drivers/net/netkit.c
> @@ -65,6 +65,7 @@ static struct netkit *netkit_priv(const struct net_device *dev)
>
> static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
> {
> + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
> struct netkit *nk = netkit_priv(dev);
> enum netkit_action ret = READ_ONCE(nk->policy);
> netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
> @@ -72,6 +73,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
> struct net_device *peer;
> int len = skb->len;
>
> + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
> rcu_read_lock();
> peer = rcu_dereference(nk->peer);
> if (unlikely(!peer || !(peer->flags & IFF_UP) ||
> @@ -110,6 +112,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
> break;
> }
> rcu_read_unlock();
> + bpf_net_ctx_clear(bpf_net_ctx);
> return ret_dev;
> }
>
On 2024-09-12 17:03:15 [+0200], Daniel Borkmann wrote: > > Oh well, quite annoying that we need this context now everywhere also outside of XDP :( > Sebastian, do you see any way where this could be noop for !PREEMPT_RT? This isn't related to XDP but to the redirect part of BPF which is (or was) using per-CPU variables. I don't know how much pain it causes here for you and how much of this is actually helping and not making anything worse: - If netkit::active is likely to be NULL you could limit assigning the context only if it != NULL - If you can ensure (via verifier) that netkit_run() won't access the redirect helper (such as bpf_redirect()) and won't return NETKIT_REDIRECT (as a consequence) then the assignment could be avoided in this case. Sebastian
On 12/09/2024 14:17, Breno Leitao wrote:
> Hello Sabastian,
>
> Thanks for the quick reply!
>
> On Thu, Sep 12, 2024 at 02:28:47PM +0200, Sebastian Andrzej Siewior wrote:
>> On 2024-09-12 05:06:36 [-0700], Breno Leitao wrote:
>>> Hello Sebastian, Jakub,
>> Hi,
>>
>>> I've seen some crashes in 6.11-rc7 that seems related to 401cb7dae8130
>>> ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.").
>>>
>>> Basically bpf_net_context is NULL, and it is being dereferenced by
>>> bpf_net_ctx->ri.kern_flags (offset 0x38) in the following code.
>>>
>>> static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
>>> {
>>> struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
>>> if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
>>>
>>> That said, it means that bpf_net_ctx_get() is returning NULL.
>>>
>>> This stack is coming from the bpf function bpf_redirect()
>>> BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
>>> {
>>> struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
>>>
>>>
>>> Since I don't think there is XDP involved, I wondering if we need some
>>> preotection before calling bpf_redirect()
>>
>> This origins in netkit_xmit(). If my memory serves me, then Daniel told
>> me that netkit is not doing any redirect and therefore does not need
>> "this". This must have been during one of the first "designs"/ versions.
>
> Right, I've seen several crashes related to this, and in all of them it
> is through netkit_xmit() -> netkit_run() -> bpf_prog_run()
>
>> If you are saying, that this is possible then something must be done.
>> Either assign a context or reject the bpf program.
>
> If we want to assign a context, do you meant something like the
> following?
>
> Author: Breno Leitao <leitao@debian.org>
> Date: Thu Sep 12 06:11:28 2024 -0700
>
> netkit: Assign missing bpf_net_context.
>
> During the introduction of struct bpf_net_context handling for
> XDP-redirect, the netkit driver has been missed.
>
> Set the bpf_net_context before invoking netkit_xmit() program within the
> netkit driver.
>
> Fixes: 401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.")
> Signed-off-by: Breno Leitao <leitao@debian.org>
>
> diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
> index 79232f5cc088..f8af57b7a1e8 100644
> --- a/drivers/net/netkit.c
> +++ b/drivers/net/netkit.c
> @@ -65,6 +65,7 @@ static struct netkit *netkit_priv(const struct net_device *dev)
>
> static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
> {
> + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
> struct netkit *nk = netkit_priv(dev);
> enum netkit_action ret = READ_ONCE(nk->policy);
> netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
> @@ -72,6 +73,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
> struct net_device *peer;
> int len = skb->len;
>
> + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
> rcu_read_lock();
Hi Breno,
looks like bpf_net_ctx should be set under rcu read lock...
> peer = rcu_dereference(nk->peer);
> if (unlikely(!peer || !(peer->flags & IFF_UP) ||
> @@ -110,6 +112,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
> break;
> }
> rcu_read_unlock();
> + bpf_net_ctx_clear(bpf_net_ctx);
> return ret_dev;
> }
Hello Vadim, On Thu, Sep 12, 2024 at 02:32:55PM +0100, Vadim Fedorenko wrote: > On 12/09/2024 14:17, Breno Leitao wrote: > > @@ -72,6 +73,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) > > struct net_device *peer; > > int len = skb->len; > > + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); > > rcu_read_lock(); > > Hi Breno, > > looks like bpf_net_ctx should be set under rcu read lock... Why exactly? I saw in some examples where bpf_net_ctx_set() was set inside the rcu_read_lock(), but, I was not able to come up with justification to do the same. Would you mind elaborating why this might be needed inside the lock? Thanks for the review, --breno
On 2024-09-12 07:19:54 [-0700], Breno Leitao wrote: > Hello Vadim, > > On Thu, Sep 12, 2024 at 02:32:55PM +0100, Vadim Fedorenko wrote: > > On 12/09/2024 14:17, Breno Leitao wrote: > > > @@ -72,6 +73,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) > > > struct net_device *peer; > > > int len = skb->len; > > > + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); > > > rcu_read_lock(); > > > > Hi Breno, > > > > looks like bpf_net_ctx should be set under rcu read lock... > > Why exactly? > > I saw in some examples where bpf_net_ctx_set() was set inside the > rcu_read_lock(), but, I was not able to come up with justification to do > the same. Would you mind elaborating why this might be needed inside the > lock? It might have been done due to simpler nesting or other reasons but there is no requirement to do this under RCU protection. The assignment and cleanup is always performed task-local. > Thanks for the review, > --breno Sebastian
On Thu, Sep 12, 2024 at 04:30:29PM +0200, Sebastian Andrzej Siewior wrote: > On 2024-09-12 07:19:54 [-0700], Breno Leitao wrote: > > Hello Vadim, > > > > On Thu, Sep 12, 2024 at 02:32:55PM +0100, Vadim Fedorenko wrote: > > > On 12/09/2024 14:17, Breno Leitao wrote: > > > > @@ -72,6 +73,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) > > > > struct net_device *peer; > > > > int len = skb->len; > > > > + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); > > > > rcu_read_lock(); > > > > > > Hi Breno, > > > > > > looks like bpf_net_ctx should be set under rcu read lock... > > > > Why exactly? > > > > I saw in some examples where bpf_net_ctx_set() was set inside the > > rcu_read_lock(), but, I was not able to come up with justification to do > > the same. Would you mind elaborating why this might be needed inside the > > lock? > > It might have been done due to simpler nesting or other reasons but > there is no requirement to do this under RCU protection. The assignment > and cleanup is always performed task-local. Thanks. I will keep it out of the RCU lock then, as in the patch above. --breno
On 2024-09-12 06:17:55 [-0700], Breno Leitao wrote: > Hello Sabastian, Hi Breno, > > This origins in netkit_xmit(). If my memory serves me, then Daniel told > > me that netkit is not doing any redirect and therefore does not need > > "this". This must have been during one of the first "designs"/ versions. > > Right, I've seen several crashes related to this, and in all of them it > is through netkit_xmit() -> netkit_run() -> bpf_prog_run() Looking at this again, NETKIT_REDIRECT invokes skb_do_redirect() which is accessing the per-CPU variables/ context from the very first day. So I must have misunderstood something. > > If you are saying, that this is possible then something must be done. > > Either assign a context or reject the bpf program. > > If we want to assign a context, do you meant something like the > following? Yes, correct. And I think that we do want the context here. Feel free to add Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if you post. Sebastian
On 2024-07-03 12:01:43 [-0700], Jakub Kicinski wrote: > On Wed, 3 Jul 2024 14:27:58 +0200 Sebastian Andrzej Siewior wrote: > > During the introduction of struct bpf_net_context handling for > > XDP-redirect, the tun driver has been missed. > > > > Set the bpf_net_context before invoking BPF XDP program within the TUN > > driver. > > Sorry if I'm missing the point but I think this is insufficient. > You've covered the NAPI-like entry point to the Rx stack in your > initial work, but there's also netif_receive_skb() which drivers > may call outside of NAPI, simply disabling BH before the call. Ah okay. I've been looking at a few callers and they ended up in NAPI but if you say and I also noticed the one in TUN… > The only concern in that case is that we end up in do_xdp_generic(), > and there's no bpf_net_set_ctx() anywhere on the way. So my intuition > would be to add the bpf_net_set_ctx() inside the if(xdp_prog) in > do_xdp_generic(). "XDP generic" has less stringent (more TC-like) > perf requirements, so setting context once per packet should be fine. > And it will also cover drivers like TUN which use both > netif_receive_skb() and call do_xdp_generic(), in a single place. Yeah, sounds good. I would remove the wrapper in tun_get_user() and then add one to do_xdp_generic(). Sebastian
During the introduction of struct bpf_net_context handling for
XDP-redirect, the tun driver has been missed.
Jakub also pointed out that there is another call chain to
do_xdp_generic() originating from netif_receive_skb() and drivers may
use it outside from the NAPI context.
Set the bpf_net_context before invoking BPF XDP program within the TUN
driver. Set the bpf_net_context also in do_xdp_generic() if a xdp
program is available.
Reported-by: syzbot+0b5c75599f1d872bea6f@syzkaller.appspotmail.com
Reported-by: syzbot+5ae46b237278e2369cac@syzkaller.appspotmail.com
Reported-by: syzbot+c1e04a422bbc0f0f2921@syzkaller.appspotmail.com
Fixes: 401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
v1…v2:
- Add the wrapper to do_xdp_generic().
- Remove the wrapper from tun_get_user() where it was used for a
single do_xdp_generic() invocation.
drivers/net/tun.c | 7 +++++++
net/core/dev.c | 5 +++++
2 files changed, 12 insertions(+)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9254bca2813dc..9b24861464bc6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1661,6 +1661,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
int len, int *skb_xdp)
{
struct page_frag *alloc_frag = ¤t->task_frag;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog *xdp_prog;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
char *buf;
@@ -1700,6 +1701,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
struct xdp_buff xdp;
@@ -1728,12 +1730,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
pad = xdp.data - xdp.data_hard_start;
len = xdp.data_end - xdp.data;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
out:
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return NULL;
@@ -2566,6 +2570,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
ctl && ctl->type == TUN_MSG_PTR) {
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct tun_page tpage;
int n = ctl->num;
int flush = 0, queued = 0;
@@ -2574,6 +2579,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
@@ -2588,6 +2594,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (tfile->napi_enabled && queued > 0)
napi_schedule(&tfile->napi);
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
diff --git a/net/core/dev.c b/net/core/dev.c
index 385c4091aa775..73e5af6943c39 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5126,11 +5126,14 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
@@ -5144,11 +5147,13 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
generic_xdp_tx(*pskb, xdp_prog);
break;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
return XDP_DROP;
}
}
return XDP_PASS;
out_redir:
+ bpf_net_ctx_clear(bpf_net_ctx);
kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
--
2.45.2
On Thu, 4 Jul 2024 12:14:52 +0200 Sebastian Andrzej Siewior wrote: > Subject: [PATCH v2 net-net] tun: Assign missing bpf_net_context. LG, but can I bother you for a repost? the subject tag is typo'ed (net-net vs net-next) and our CI put this on top of net, since it applies. But it doesn't build on top of net. Feel free to repost without any wait.
During the introduction of struct bpf_net_context handling for
XDP-redirect, the tun driver has been missed.
Jakub also pointed out that there is another call chain to
do_xdp_generic() originating from netif_receive_skb() and drivers may
use it outside from the NAPI context.
Set the bpf_net_context before invoking BPF XDP program within the TUN
driver. Set the bpf_net_context also in do_xdp_generic() if a xdp
program is available.
Reported-by: syzbot+0b5c75599f1d872bea6f@syzkaller.appspotmail.com
Reported-by: syzbot+5ae46b237278e2369cac@syzkaller.appspotmail.com
Reported-by: syzbot+c1e04a422bbc0f0f2921@syzkaller.appspotmail.com
Fixes: 401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
On 2024-07-04 07:24:33 [-0700], Jakub Kicinski wrote:
> LG, but can I bother you for a repost? the subject tag is typo'ed
> (net-net vs net-next) and our CI put this on top of net, since it
> applies. But it doesn't build on top of net.
> Feel free to repost without any wait.
I am sorry for the trouble.
v2…v3:
- Repost due to typo in subject.
v1…v2:
- Add the wrapper to do_xdp_generic().
- Remove the wrapper from tun_get_user() where it was used for a
single do_xdp_generic() invocation.
drivers/net/tun.c | 7 +++++++
net/core/dev.c | 5 +++++
2 files changed, 12 insertions(+)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9254bca2813dc..9b24861464bc6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1661,6 +1661,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
int len, int *skb_xdp)
{
struct page_frag *alloc_frag = ¤t->task_frag;
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct bpf_prog *xdp_prog;
int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
char *buf;
@@ -1700,6 +1701,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
xdp_prog = rcu_dereference(tun->xdp_prog);
if (xdp_prog) {
struct xdp_buff xdp;
@@ -1728,12 +1730,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
pad = xdp.data - xdp.data_hard_start;
len = xdp.data_end - xdp.data;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
out:
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
return NULL;
@@ -2566,6 +2570,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
ctl && ctl->type == TUN_MSG_PTR) {
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
struct tun_page tpage;
int n = ctl->num;
int flush = 0, queued = 0;
@@ -2574,6 +2579,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
local_bh_disable();
rcu_read_lock();
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
@@ -2588,6 +2594,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (tfile->napi_enabled && queued > 0)
napi_schedule(&tfile->napi);
+ bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
local_bh_enable();
diff --git a/net/core/dev.c b/net/core/dev.c
index 385c4091aa775..73e5af6943c39 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5126,11 +5126,14 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
{
+ struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+
if (xdp_prog) {
struct xdp_buff xdp;
u32 act;
int err;
+ bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog);
if (act != XDP_PASS) {
switch (act) {
@@ -5144,11 +5147,13 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb)
generic_xdp_tx(*pskb, xdp_prog);
break;
}
+ bpf_net_ctx_clear(bpf_net_ctx);
return XDP_DROP;
}
}
return XDP_PASS;
out_redir:
+ bpf_net_ctx_clear(bpf_net_ctx);
kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP);
return XDP_DROP;
}
--
2.45.2
© 2016 - 2026 Red Hat, Inc.