include/linux/local_lock.h | 7 +++++++ 1 file changed, 7 insertions(+)
On !PREEMPT_RT and !LOCKDEP kernels, local_[un]lock_nested_bh()
are supposed to be NOP.
This is not exactly true after 7ff495e26a39 ("local_lock: Move
this_cpu_ptr() notation from internal to main header") due to
this_cpu_ptr() being evaluated even if its result it not used.
This prevents some tail call optimizations.
After this patch we have gains in networking fast paths:
$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 0/36 up/down: 0/-644 (-644)
Function old new delta
tcp_sigpool_end 79 71 -8
skb_attempt_defer_free 457 449 -8
ppp_xmit_process 179 171 -8
ppp_write 411 403 -8
ppp_output_wakeup 135 127 -8
napi_skb_cache_get_bulk 440 432 -8
napi_consume_skb 409 401 -8
dst_cache_set_ip6 203 195 -8
dst_cache_set_ip4 135 127 -8
cpu_map_enqueue 193 185 -8
bq_enqueue 263 255 -8
__netdev_alloc_skb 377 369 -8
__netdev_alloc_frag_align 155 147 -8
__napi_kfree_skb 136 128 -8
napi_skb_free_stolen_head 199 190 -9
input_action_end_bpf 1083 1072 -11
napi_alloc_skb 275 263 -12
__napi_alloc_frag_align 59 45 -14
xdp_build_skb_from_zc 590 574 -16
tcp_v4_send_ack 1129 1113 -16
sch_frag_xmit_hook 1260 1244 -16
flush_backlog 507 491 -16
dst_cache_get_ip6 99 83 -16
dst_cache_get_ip4 90 74 -16
do_xdp_generic 932 916 -16
__napi_build_skb 591 575 -16
__dev_flush 115 99 -16
__cpu_map_flush 85 69 -16
dst_cache_get 55 38 -17
tcp_v4_send_reset 2682 2658 -24
mptcp_subflow_delegate 955 931 -24
__alloc_skb 988 964 -24
mptcp_napi_poll 310 281 -29
nat_keepalive_work_single 1385 1335 -50
gro_cells_receive 320 244 -76
process_backlog 486 404 -82
Total: Before=25812320, After=25811676, chg -0.00%
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Marco Elver <elver@google.com>
---
include/linux/local_lock.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
--- a/include/linux/local_lock.h
+++ b/include/linux/local_lock.h
@@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
local_unlock_irqrestore(_T->lock, _T->flags),
unsigned long flags)
+#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
+ defined(CONFIG_DEBUG_LOCK_ALLOC)
#define local_lock_nested_bh(_lock) \
__local_lock_nested_bh(__this_cpu_local_lock(_lock))
#define local_unlock_nested_bh(_lock) \
__local_unlock_nested_bh(__this_cpu_local_lock(_lock))
+#else
+static inline void local_lock_nested_bh(local_lock_t *_lock) {}
+static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
+#endif
+
DEFINE_LOCK_GUARD_1(local_lock_nested_bh, local_lock_t __percpu,
local_lock_nested_bh(_T->lock),
local_unlock_nested_bh(_T->lock))
base-commit: 1f318b96cc84d7c2ab792fcc0bfd42a7ca890681
prerequisite-patch-id: f6002c357582927a383603a22e69bc0d7a5b9528
--
2.53.0.473.g4a7958ca14-goog
On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
> diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> --- a/include/linux/local_lock.h
> +++ b/include/linux/local_lock.h
> @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> local_unlock_irqrestore(_T->lock, _T->flags),
> unsigned long flags)
>
> +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> + defined(CONFIG_DEBUG_LOCK_ALLOC)
> #define local_lock_nested_bh(_lock) \
> __local_lock_nested_bh(__this_cpu_local_lock(_lock))
>
> #define local_unlock_nested_bh(_lock) \
> __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
>
> +#else
> +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> +#endif
This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
clang >= 22.1
How come that this isn't DCEd properly?
On Mon, Mar 9, 2026 at 2:44 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
>
> > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> > --- a/include/linux/local_lock.h
> > +++ b/include/linux/local_lock.h
> > @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> > local_unlock_irqrestore(_T->lock, _T->flags),
> > unsigned long flags)
> >
> > +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> > + defined(CONFIG_DEBUG_LOCK_ALLOC)
> > #define local_lock_nested_bh(_lock) \
> > __local_lock_nested_bh(__this_cpu_local_lock(_lock))
> >
> > #define local_unlock_nested_bh(_lock) \
> > __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
> >
> > +#else
> > +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> > +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> > +#endif
>
> This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
> clang >= 22.1
>
> How come that this isn't DCEd properly?
It might be partially done.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0e217041958a83d2a3c18de2965808442546c49b..50455951dc38668b0cbbcccdb2c5ce726e3c4da9
100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -7498,3 +7498,12 @@ struct vlan_type_depth
__vlan_get_protocol_offset(const struct sk_buff *skb,
};
}
EXPORT_SYMBOL(__vlan_get_protocol_offset);
+
+void ericeric(void);
+void ericeric(void)
+{
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
+}
objdump --disassemble=ericeric -r net/core/skbuff.o
net/core/skbuff.o: file format elf64-x86-64
Disassembly of section .text:
000000000000fe40 <ericeric>:
fe40: f3 0f 1e fa endbr64
fe44: e8 00 00 00 00 call fe49 <ericeric+0x9>
fe45: R_X86_64_PLT32 __fentry__-0x4
fe49: 65 48 8b 05 00 00 00 mov %gs:0x0(%rip),%rax # fe51
<ericeric+0x11>
fe50: 00
fe4d: R_X86_64_PC32 this_cpu_off-0x4
fe51: 2e e9 00 00 00 00 cs jmp fe57 <ericeric+0x17>
fe53: R_X86_64_PLT32 __x86_return_thunk-0x4
Disassembly of section .init.text:
On Mon, Mar 9, 2026 at 3:03 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Mon, Mar 9, 2026 at 2:44 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
> >
> > > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > > index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> > > --- a/include/linux/local_lock.h
> > > +++ b/include/linux/local_lock.h
> > > @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> > > local_unlock_irqrestore(_T->lock, _T->flags),
> > > unsigned long flags)
> > >
> > > +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> > > + defined(CONFIG_DEBUG_LOCK_ALLOC)
> > > #define local_lock_nested_bh(_lock) \
> > > __local_lock_nested_bh(__this_cpu_local_lock(_lock))
> > >
> > > #define local_unlock_nested_bh(_lock) \
> > > __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
> > >
> > > +#else
> > > +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> > > +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> > > +#endif
> >
> > This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
> > clang >= 22.1
> >
> > How come that this isn't DCEd properly?
>
> It might be partially done.
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 0e217041958a83d2a3c18de2965808442546c49b..50455951dc38668b0cbbcccdb2c5ce726e3c4da9
> 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -7498,3 +7498,12 @@ struct vlan_type_depth
> __vlan_get_protocol_offset(const struct sk_buff *skb,
> };
> }
> EXPORT_SYMBOL(__vlan_get_protocol_offset);
> +
> +void ericeric(void);
> +void ericeric(void)
> +{
> + local_lock_nested_bh(&napi_alloc_cache.bh_lock);
> + local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
> + local_lock_nested_bh(&napi_alloc_cache.bh_lock);
> + local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
> +}
>
> objdump --disassemble=ericeric -r net/core/skbuff.o
>
> net/core/skbuff.o: file format elf64-x86-64
>
>
> Disassembly of section .text:
>
> 000000000000fe40 <ericeric>:
> fe40: f3 0f 1e fa endbr64
> fe44: e8 00 00 00 00 call fe49 <ericeric+0x9>
> fe45: R_X86_64_PLT32 __fentry__-0x4
> fe49: 65 48 8b 05 00 00 00 mov %gs:0x0(%rip),%rax # fe51
> <ericeric+0x11>
> fe50: 00
> fe4d: R_X86_64_PC32 this_cpu_off-0x4
> fe51: 2e e9 00 00 00 00 cs jmp fe57 <ericeric+0x17>
> fe53: R_X86_64_PLT32 __x86_return_thunk-0x4
>
> Disassembly of section .init.text:
Same for
+
+void ericeric(void);
+void ericeric(void)
+{
+ raw_cpu_read_long(this_cpu_off);
+ raw_cpu_read_long(this_cpu_off);
+}
I am guessing __raw_cpu_read() is forcing the asm ?
On Mon, Mar 9, 2026 at 3:18 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Mon, Mar 9, 2026 at 3:03 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Mon, Mar 9, 2026 at 2:44 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
> > >
> > > > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > > > index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> > > > --- a/include/linux/local_lock.h
> > > > +++ b/include/linux/local_lock.h
> > > > @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> > > > local_unlock_irqrestore(_T->lock, _T->flags),
> > > > unsigned long flags)
> > > >
> > > > +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> > > > + defined(CONFIG_DEBUG_LOCK_ALLOC)
> > > > #define local_lock_nested_bh(_lock) \
> > > > __local_lock_nested_bh(__this_cpu_local_lock(_lock))
> > > >
> > > > #define local_unlock_nested_bh(_lock) \
> > > > __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
> > > >
> > > > +#else
> > > > +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> > > > +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> > > > +#endif
> > >
> > > This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
> > > clang >= 22.1
> > >
> > > How come that this isn't DCEd properly?
> >
> > It might be partially done.
> >
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index 0e217041958a83d2a3c18de2965808442546c49b..50455951dc38668b0cbbcccdb2c5ce726e3c4da9
> > 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -7498,3 +7498,12 @@ struct vlan_type_depth
> > __vlan_get_protocol_offset(const struct sk_buff *skb,
> > };
> > }
> > EXPORT_SYMBOL(__vlan_get_protocol_offset);
> > +
> > +void ericeric(void);
> > +void ericeric(void)
> > +{
> > + local_lock_nested_bh(&napi_alloc_cache.bh_lock);
> > + local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
> > + local_lock_nested_bh(&napi_alloc_cache.bh_lock);
> > + local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
> > +}
> >
> > objdump --disassemble=ericeric -r net/core/skbuff.o
> >
> > net/core/skbuff.o: file format elf64-x86-64
> >
> >
> > Disassembly of section .text:
> >
> > 000000000000fe40 <ericeric>:
> > fe40: f3 0f 1e fa endbr64
> > fe44: e8 00 00 00 00 call fe49 <ericeric+0x9>
> > fe45: R_X86_64_PLT32 __fentry__-0x4
> > fe49: 65 48 8b 05 00 00 00 mov %gs:0x0(%rip),%rax # fe51
> > <ericeric+0x11>
> > fe50: 00
> > fe4d: R_X86_64_PC32 this_cpu_off-0x4
> > fe51: 2e e9 00 00 00 00 cs jmp fe57 <ericeric+0x17>
> > fe53: R_X86_64_PLT32 __x86_return_thunk-0x4
> >
> > Disassembly of section .init.text:
>
> Same for
>
> +
> +void ericeric(void);
> +void ericeric(void)
> +{
> + raw_cpu_read_long(this_cpu_off);
> + raw_cpu_read_long(this_cpu_off);
> +}
>
> I am guessing __raw_cpu_read() is forcing the asm ?
Might be a clang issue. Oh well.
clang --version
Debian clang version 19.1.7 (10.1+build1)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/lib/llvm-19/bin
Documentation/process/changes.rst mentions the minimum supported version is 15.0
On 2026-03-09 15:52:34 [+0100], Eric Dumazet wrote:
> > +void ericeric(void);
> > +void ericeric(void)
> > +{
> > + raw_cpu_read_long(this_cpu_off);
> > + raw_cpu_read_long(this_cpu_off);
> > +}
> >
> > I am guessing __raw_cpu_read() is forcing the asm ?
>
> Might be a clang issue. Oh well.
So the difference is that with gcc we have USE_X86_SEG_SUPPORT and with
llvm we don't. This leads to two asm statements with LLVM of which only
one is eliminated. This optimisation origins in commit ca4256348660c
("x86/percpu: Use C for percpu read/write accessors").
__seg_fs and __seg_gs is supported by LLVM but enabling it leads to tons
warnings and aborts later.
Is there something missing in LLVM? The generated code for
raw_cpu_read_long(this_cpu_off) looks fine.
Sebastian
On Wed, Mar 11, 2026 at 4:55 PM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
>
> On 2026-03-09 15:52:34 [+0100], Eric Dumazet wrote:
> > > +void ericeric(void);
> > > +void ericeric(void)
> > > +{
> > > + raw_cpu_read_long(this_cpu_off);
> > > + raw_cpu_read_long(this_cpu_off);
> > > +}
> > >
> > > I am guessing __raw_cpu_read() is forcing the asm ?
> >
> > Might be a clang issue. Oh well.
>
> So the difference is that with gcc we have USE_X86_SEG_SUPPORT and with
> llvm we don't. This leads to two asm statements with LLVM of which only
> one is eliminated. This optimisation origins in commit ca4256348660c
> ("x86/percpu: Use C for percpu read/write accessors").
>
> __seg_fs and __seg_gs is supported by LLVM but enabling it leads to tons
> warnings and aborts later.
Tons of warnings is just due to clang being picky and warns for
duplicated qualifiers, such as "__seg_gs __seg_gs var". This can be
fixed with:
https://lore.kernel.org/lkml/20240526175655.227798-1-ubizjak@gmail.com/
> Is there something missing in LLVM? The generated code for
> raw_cpu_read_long(this_cpu_off) looks fine.
Yes:
1. The %fs: and %gs: prefix does not get emitted in inline assembly.
2. An internal compiler error when addressing symbols directly:
https://github.com/llvm/llvm-project/issues/93449
3. Wrong named address space for anonymous struct:
https://github.com/llvm/llvm-project/issues/119705
Uros.
On Wed, Mar 11, 2026 at 5:32 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Wed, Mar 11, 2026 at 4:55 PM Sebastian Andrzej Siewior
> <bigeasy@linutronix.de> wrote:
> >
> > On 2026-03-09 15:52:34 [+0100], Eric Dumazet wrote:
> > > > +void ericeric(void);
> > > > +void ericeric(void)
> > > > +{
> > > > + raw_cpu_read_long(this_cpu_off);
> > > > + raw_cpu_read_long(this_cpu_off);
> > > > +}
> > > >
> > > > I am guessing __raw_cpu_read() is forcing the asm ?
> > >
> > > Might be a clang issue. Oh well.
> >
> > So the difference is that with gcc we have USE_X86_SEG_SUPPORT and with
> > llvm we don't. This leads to two asm statements with LLVM of which only
> > one is eliminated. This optimisation origins in commit ca4256348660c
> > ("x86/percpu: Use C for percpu read/write accessors").
> >
> > __seg_fs and __seg_gs is supported by LLVM but enabling it leads to tons
> > warnings and aborts later.
>
> Tons of warnings is just due to clang being picky and warns for
> duplicated qualifiers, such as "__seg_gs __seg_gs var". This can be
> fixed with:
>
> https://lore.kernel.org/lkml/20240526175655.227798-1-ubizjak@gmail.com/
>
> > Is there something missing in LLVM? The generated code for
> > raw_cpu_read_long(this_cpu_off) looks fine.
>
> Yes:
>
> 1. The %fs: and %gs: prefix does not get emitted in inline assembly.
>
> 2. An internal compiler error when addressing symbols directly:
> https://github.com/llvm/llvm-project/issues/93449
>
> 3. Wrong named address space for anonymous struct:
> https://github.com/llvm/llvm-project/issues/119705
BTW: A related issue is that ASAN fails to handle gs: prefixed
addresses. For GCC, we have had to disable ASAN instrumentation for
all locations in non-default address spaces. With asm accessors, the
access is hidden from ASAN, and the memory access is not instrumented
anyways.
Uros.
On Mon, Mar 9, 2026 at 2:44 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
>
> > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> > --- a/include/linux/local_lock.h
> > +++ b/include/linux/local_lock.h
> > @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> > local_unlock_irqrestore(_T->lock, _T->flags),
> > unsigned long flags)
> >
> > +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> > + defined(CONFIG_DEBUG_LOCK_ALLOC)
> > #define local_lock_nested_bh(_lock) \
> > __local_lock_nested_bh(__this_cpu_local_lock(_lock))
> >
> > #define local_unlock_nested_bh(_lock) \
> > __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
> >
> > +#else
> > +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> > +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> > +#endif
>
> This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
> clang >= 22.1
>
> How come that this isn't DCEd properly?
BTW I wonder if the following WARN_CONTEXT_ANALYSIS should be
CONFIG_WARN_CONTEXT_ANALYSIS
include/linux/local_lock_internal.h:318:#if defined(WARN_CONTEXT_ANALYSIS)
include/linux/local_lock_internal.h:337:#else /* WARN_CONTEXT_ANALYSIS */
include/linux/local_lock_internal.h:339:#endif /* WARN_CONTEXT_ANALYSIS */
On Mon, 9 Mar 2026 at 14:49, Eric Dumazet <edumazet@google.com> wrote:
>
> On Mon, Mar 9, 2026 at 2:44 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
> >
> > > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > > index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> > > --- a/include/linux/local_lock.h
> > > +++ b/include/linux/local_lock.h
> > > @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> > > local_unlock_irqrestore(_T->lock, _T->flags),
> > > unsigned long flags)
> > >
> > > +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> > > + defined(CONFIG_DEBUG_LOCK_ALLOC)
> > > #define local_lock_nested_bh(_lock) \
> > > __local_lock_nested_bh(__this_cpu_local_lock(_lock))
> > >
> > > #define local_unlock_nested_bh(_lock) \
> > > __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
> > >
> > > +#else
> > > +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> > > +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> > > +#endif
> >
> > This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
> > clang >= 22.1
> >
> > How come that this isn't DCEd properly?
>
> BTW I wonder if the following WARN_CONTEXT_ANALYSIS should be
> CONFIG_WARN_CONTEXT_ANALYSIS
>
> include/linux/local_lock_internal.h:318:#if defined(WARN_CONTEXT_ANALYSIS)
> include/linux/local_lock_internal.h:337:#else /* WARN_CONTEXT_ANALYSIS */
> include/linux/local_lock_internal.h:339:#endif /* WARN_CONTEXT_ANALYSIS */
Even if enabled in Kconfig, our make rules set -DWARN_CONTEXT_ANALYSIS
for translation units where we actually want to compile with
-Wthread-safety. So WARN_CONTEXT_ANALYSIS should be ok.
But for !CONFIG_PREEMPT_RT and !CONFIG_DEBUG_LOCK_ALLOC builds, where
we build with context analysis (which is purely static, no dynamic
overhead) we should be able to get the same better codegen as well.
On Mon, Mar 9, 2026 at 3:06 PM Marco Elver <elver@google.com> wrote:
>
> On Mon, 9 Mar 2026 at 14:49, Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Mon, Mar 9, 2026 at 2:44 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Mon, Mar 09, 2026 at 12:20:55PM +0000, Eric Dumazet wrote:
> > >
> > > > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > > > index b8830148a8591c17c22e36470fbc13ff5c354955..40c2da54a0b720265be7b6327e0922a49befd8fc 100644
> > > > --- a/include/linux/local_lock.h
> > > > +++ b/include/linux/local_lock.h
> > > > @@ -94,12 +94,19 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
> > > > local_unlock_irqrestore(_T->lock, _T->flags),
> > > > unsigned long flags)
> > > >
> > > > +#if defined(WARN_CONTEXT_ANALYSIS) || defined(CONFIG_PREEMPT_RT) || \
> > > > + defined(CONFIG_DEBUG_LOCK_ALLOC)
> > > > #define local_lock_nested_bh(_lock) \
> > > > __local_lock_nested_bh(__this_cpu_local_lock(_lock))
> > > >
> > > > #define local_unlock_nested_bh(_lock) \
> > > > __local_unlock_nested_bh(__this_cpu_local_lock(_lock))
> > > >
> > > > +#else
> > > > +static inline void local_lock_nested_bh(local_lock_t *_lock) {}
> > > > +static inline void local_unlock_nested_bh(local_lock_t *__lock) {}
> > > > +#endif
> > >
> > > This isn't going to work; WARN_CONTEXT_ANALYSIS is unconditional on
> > > clang >= 22.1
> > >
> > > How come that this isn't DCEd properly?
> >
> > BTW I wonder if the following WARN_CONTEXT_ANALYSIS should be
> > CONFIG_WARN_CONTEXT_ANALYSIS
> >
> > include/linux/local_lock_internal.h:318:#if defined(WARN_CONTEXT_ANALYSIS)
> > include/linux/local_lock_internal.h:337:#else /* WARN_CONTEXT_ANALYSIS */
> > include/linux/local_lock_internal.h:339:#endif /* WARN_CONTEXT_ANALYSIS */
>
> Even if enabled in Kconfig, our make rules set -DWARN_CONTEXT_ANALYSIS
> for translation units where we actually want to compile with
> -Wthread-safety. So WARN_CONTEXT_ANALYSIS should be ok.
>
> But for !CONFIG_PREEMPT_RT and !CONFIG_DEBUG_LOCK_ALLOC builds, where
> we build with context analysis (which is purely static, no dynamic
> overhead) we should be able to get the same better codegen as well.
Ah ok, a bit confusing ....
© 2016 - 2026 Red Hat, Inc.