Add the try_cmpxchg() form to the per-cpu ops.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/asm-generic/percpu.h | 113 +++++++++++++++++++++++++++++++++++++++++--
include/linux/percpu-defs.h | 19 +++++++
2 files changed, 128 insertions(+), 4 deletions(-)
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -89,16 +89,37 @@ do { \
__ret; \
})
-#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
+#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg) \
+({ \
+ typeof(pcp) __val, __old = *(ovalp); \
+ __val = _cmpxchg(pcp, __old, nval); \
+ if (__val != __old) \
+ *(ovalp) = __val; \
+ __val == __old; \
+})
+
+#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \
({ \
typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \
- typeof(pcp) __ret; \
- __ret = *__p; \
- if (__ret == (oval)) \
+ typeof(pcp) __val = *__p, __old = *(ovalp); \
+ bool __ret; \
+ if (__val == __old) { \
*__p = nval; \
+ __ret = true; \
+ } else { \
+ *(ovalp) = __val; \
+ __ret = false; \
+ } \
__ret; \
})
+#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
+({ \
+ typeof(pcp) __old = (oval); \
+ raw_cpu_generic_try_cmpxchg(pcp, &__old, nval); \
+ __old; \
+})
+
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1)); \
@@ -170,6 +191,16 @@ do { \
__ret; \
})
+#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \
+({ \
+ bool __ret; \
+ unsigned long __flags; \
+ raw_local_irq_save(__flags); \
+ __ret = raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval); \
+ raw_local_irq_restore(__flags); \
+ __ret; \
+})
+
#define this_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
typeof(pcp) __ret; \
@@ -282,6 +313,43 @@ do { \
#define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
#endif
+#ifndef raw_cpu_try_cmpxchg_1
+#ifdef raw_cpu_cmpxchg_1
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_1)
+#else
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_2
+#ifdef raw_cpu_cmpxchg_2
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_2)
+#else
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_4
+#ifdef raw_cpu_cmpxchg_4
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_4)
+#else
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_8
+#ifdef raw_cpu_cmpxchg_8
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_8)
+#else
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
#ifndef raw_cpu_cmpxchg_1
#define raw_cpu_cmpxchg_1(pcp, oval, nval) \
raw_cpu_generic_cmpxchg(pcp, oval, nval)
@@ -407,6 +475,43 @@ do { \
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
#endif
+#ifndef this_cpu_try_cmpxchg_1
+#ifdef this_cpu_cmpxchg_1
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_1)
+#else
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_2
+#ifdef this_cpu_cmpxchg_2
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_2)
+#else
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_4
+#ifdef this_cpu_cmpxchg_4
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_4)
+#else
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_8
+#ifdef this_cpu_cmpxchg_8
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_8)
+#else
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
#ifndef this_cpu_cmpxchg_1
#define this_cpu_cmpxchg_1(pcp, oval, nval) \
this_cpu_generic_cmpxchg(pcp, oval, nval)
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -343,6 +343,21 @@ static __always_inline void __this_cpu_p
pscr2_ret__; \
})
+#define __pcpu_size_call_return2bool(stem, variable, ...) \
+({ \
+ bool pscr2_ret__; \
+ __verify_pcpu_ptr(&(variable)); \
+ switch(sizeof(variable)) { \
+ case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
+ case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \
+ case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \
+ case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
+ default: \
+ __bad_size_call_parameter(); break; \
+ } \
+ pscr2_ret__; \
+})
+
/*
* Special handling for cmpxchg_double. cmpxchg_double is passed two
* percpu variables. The first has to be aligned to a double word
@@ -426,6 +441,8 @@ do { \
#define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
#define raw_cpu_cmpxchg(pcp, oval, nval) \
__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+#define raw_cpu_try_cmpxchg(pcp, ovalp, nval) \
+ __pcpu_size_call_return2bool(raw_cpu_try_cmpxchg_, pcp, ovalp, nval)
#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
@@ -513,6 +530,8 @@ do { \
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
#define this_cpu_cmpxchg(pcp, oval, nval) \
__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#define this_cpu_try_cmpxchg(pcp, ovalp, nval) \
+ __pcpu_size_call_return2bool(this_cpu_try_cmpxchg_, pcp, ovalp, nval)
#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
On 31.05.2023 15:08, Peter Zijlstra wrote:
> Add the try_cmpxchg() form to the per-cpu ops.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
+CC Nathan, llvm list
Hi all, this patch seems to break booting on Qualcomm ARM64 platforms
when compiled with clang (GCC works fine) for some reason..:
next-20230605 - works
next-20230606 - doesn't
grev -m 1 dc4e51fd9846 on next-20230606 - works again
b4 shazam <this_msgid> -P 1-4 - still works
b4 shazam <this_msgid> -P 5 - breaks
Confirmed on at least Qualcomm QCM2290, SM8250.
Checking the serial console, it hits a BUG_ON:
[ 0.000000] ------------[ cut here ]------------
[ 0.000000] kernel BUG at mm/vmalloc.c:1638!
[ 0.000000] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
[ 0.000000] Modules linked in:
[ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [snip]
[ 0.000000] Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT)
[ 0.000000] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 0.000000] pc : alloc_vmap_area+0xafc/0xb08
[ 0.000000] lr : alloc_vmap_area+0x9e4/0xb08
[ 0.000000] sp : ffffa50137f53c20
[ 0.000000] x29: ffffa50137f53c60 x28: ffffa50137f30c18 x27: 0000000000000000
[ 0.000000] x26: 0000000000007fff x25: ffff800080000000 x24: 000000000000cfff
[ 0.000000] x23: ffffffffffff8000 x22: ffffa50137fef970 x21: fffffbfff0000000
[ 0.000000] x20: ffff022982003208 x19: ffff0229820031f8 x18: ffffa50137f64f70
[ 0.000000] x17: ffffa50137fef980 x16: ffffa501375e6d08 x15: 0000000000000001
[ 0.000000] x14: ffffa5013831e1a0 x13: ffffa50137f30c18 x12: 0000000000402dc2
[ 0.000000] x11: 0000000000000000 x10: ffff022982003018 x9 : ffffa5013831e188
[ 0.000000] x8 : ffffcb55ff003228 x7 : 0000000000000000 x6 : 0000000000000048
[ 0.000000] x5 : 0000000000000000 x4 : ffffa50137f53bd0 x3 : ffffa50136490000
[ 0.000000] x2 : 0000000000000001 x1 : ffffa5013831e190 x0 : ffff022982003208
[ 0.000000] Call trace:
[ 0.000000] alloc_vmap_area+0xafc/0xb08
[ 0.000000] __get_vm_area_node+0x108/0x1e8
[ 0.000000] __vmalloc_node_range+0x1fc/0x728
[ 0.000000] __vmalloc_node+0x5c/0x70
[ 0.000000] init_IRQ+0x90/0x11c
[ 0.000000] start_kernel+0x1ac/0x3bc
[ 0.000000] __primary_switched+0xc4/0xcc
[ 0.000000] Code: f000e300 91062000 943bd9ba 17ffff8f (d4210000)
[ 0.000000] ---[ end trace 0000000000000000 ]---
[ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
Compiled with clang 15.0.7 from Arch repos, with
make ARCH=arm64 LLVM=1
Konrad
> include/asm-generic/percpu.h | 113 +++++++++++++++++++++++++++++++++++++++++--
> include/linux/percpu-defs.h | 19 +++++++
> 2 files changed, 128 insertions(+), 4 deletions(-)
>
> --- a/include/asm-generic/percpu.h
> +++ b/include/asm-generic/percpu.h
> @@ -89,16 +89,37 @@ do { \
> __ret; \
> })
>
> -#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
> +#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg) \
> +({ \
> + typeof(pcp) __val, __old = *(ovalp); \
> + __val = _cmpxchg(pcp, __old, nval); \
> + if (__val != __old) \
> + *(ovalp) = __val; \
> + __val == __old; \
> +})
> +
> +#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \
> ({ \
> typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \
> - typeof(pcp) __ret; \
> - __ret = *__p; \
> - if (__ret == (oval)) \
> + typeof(pcp) __val = *__p, __old = *(ovalp); \
> + bool __ret; \
> + if (__val == __old) { \
> *__p = nval; \
> + __ret = true; \
> + } else { \
> + *(ovalp) = __val; \
> + __ret = false; \
> + } \
> __ret; \
> })
>
> +#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
> +({ \
> + typeof(pcp) __old = (oval); \
> + raw_cpu_generic_try_cmpxchg(pcp, &__old, nval); \
> + __old; \
> +})
> +
> #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
> ({ \
> typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1)); \
> @@ -170,6 +191,16 @@ do { \
> __ret; \
> })
>
> +#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \
> +({ \
> + bool __ret; \
> + unsigned long __flags; \
> + raw_local_irq_save(__flags); \
> + __ret = raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval); \
> + raw_local_irq_restore(__flags); \
> + __ret; \
> +})
> +
> #define this_cpu_generic_cmpxchg(pcp, oval, nval) \
> ({ \
> typeof(pcp) __ret; \
> @@ -282,6 +313,43 @@ do { \
> #define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
> #endif
>
> +#ifndef raw_cpu_try_cmpxchg_1
> +#ifdef raw_cpu_cmpxchg_1
> +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_1)
> +#else
> +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef raw_cpu_try_cmpxchg_2
> +#ifdef raw_cpu_cmpxchg_2
> +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_2)
> +#else
> +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef raw_cpu_try_cmpxchg_4
> +#ifdef raw_cpu_cmpxchg_4
> +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_4)
> +#else
> +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef raw_cpu_try_cmpxchg_8
> +#ifdef raw_cpu_cmpxchg_8
> +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_8)
> +#else
> +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +
> #ifndef raw_cpu_cmpxchg_1
> #define raw_cpu_cmpxchg_1(pcp, oval, nval) \
> raw_cpu_generic_cmpxchg(pcp, oval, nval)
> @@ -407,6 +475,43 @@ do { \
> #define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
> #endif
>
> +#ifndef this_cpu_try_cmpxchg_1
> +#ifdef this_cpu_cmpxchg_1
> +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_1)
> +#else
> +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef this_cpu_try_cmpxchg_2
> +#ifdef this_cpu_cmpxchg_2
> +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_2)
> +#else
> +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef this_cpu_try_cmpxchg_4
> +#ifdef this_cpu_cmpxchg_4
> +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_4)
> +#else
> +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef this_cpu_try_cmpxchg_8
> +#ifdef this_cpu_cmpxchg_8
> +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_8)
> +#else
> +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +
> #ifndef this_cpu_cmpxchg_1
> #define this_cpu_cmpxchg_1(pcp, oval, nval) \
> this_cpu_generic_cmpxchg(pcp, oval, nval)
> --- a/include/linux/percpu-defs.h
> +++ b/include/linux/percpu-defs.h
> @@ -343,6 +343,21 @@ static __always_inline void __this_cpu_p
> pscr2_ret__; \
> })
>
> +#define __pcpu_size_call_return2bool(stem, variable, ...) \
> +({ \
> + bool pscr2_ret__; \
> + __verify_pcpu_ptr(&(variable)); \
> + switch(sizeof(variable)) { \
> + case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
> + case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \
> + case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \
> + case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
> + default: \
> + __bad_size_call_parameter(); break; \
> + } \
> + pscr2_ret__; \
> +})
> +
> /*
> * Special handling for cmpxchg_double. cmpxchg_double is passed two
> * percpu variables. The first has to be aligned to a double word
> @@ -426,6 +441,8 @@ do { \
> #define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
> #define raw_cpu_cmpxchg(pcp, oval, nval) \
> __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
> +#define raw_cpu_try_cmpxchg(pcp, ovalp, nval) \
> + __pcpu_size_call_return2bool(raw_cpu_try_cmpxchg_, pcp, ovalp, nval)
> #define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
> __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
>
> @@ -513,6 +530,8 @@ do { \
> #define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
> #define this_cpu_cmpxchg(pcp, oval, nval) \
> __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
> +#define this_cpu_try_cmpxchg(pcp, ovalp, nval) \
> + __pcpu_size_call_return2bool(this_cpu_try_cmpxchg_, pcp, ovalp, nval)
> #define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
> __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
>
Hi Konrad, On Fri, Jun 09, 2023 at 06:10:38PM +0200, Konrad Dybcio wrote: > > > On 31.05.2023 15:08, Peter Zijlstra wrote: > > Add the try_cmpxchg() form to the per-cpu ops. > > > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > > --- > +CC Nathan, llvm list > > Hi all, this patch seems to break booting on Qualcomm ARM64 platforms > when compiled with clang (GCC works fine) for some reason..: > > next-20230605 - works > next-20230606 - doesn't > > grev -m 1 dc4e51fd9846 on next-20230606 - works again > b4 shazam <this_msgid> -P 1-4 - still works > b4 shazam <this_msgid> -P 5 - breaks > > Confirmed on at least Qualcomm QCM2290, SM8250. > > Checking the serial console, it hits a BUG_ON: > > [ 0.000000] ------------[ cut here ]------------ > [ 0.000000] kernel BUG at mm/vmalloc.c:1638! > [ 0.000000] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP > [ 0.000000] Modules linked in: > [ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [snip] > [ 0.000000] Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) > [ 0.000000] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) > [ 0.000000] pc : alloc_vmap_area+0xafc/0xb08 > [ 0.000000] lr : alloc_vmap_area+0x9e4/0xb08 > [ 0.000000] sp : ffffa50137f53c20 > [ 0.000000] x29: ffffa50137f53c60 x28: ffffa50137f30c18 x27: 0000000000000000 > [ 0.000000] x26: 0000000000007fff x25: ffff800080000000 x24: 000000000000cfff > [ 0.000000] x23: ffffffffffff8000 x22: ffffa50137fef970 x21: fffffbfff0000000 > [ 0.000000] x20: ffff022982003208 x19: ffff0229820031f8 x18: ffffa50137f64f70 > [ 0.000000] x17: ffffa50137fef980 x16: ffffa501375e6d08 x15: 0000000000000001 > [ 0.000000] x14: ffffa5013831e1a0 x13: ffffa50137f30c18 x12: 0000000000402dc2 > [ 0.000000] x11: 0000000000000000 x10: ffff022982003018 x9 : ffffa5013831e188 > [ 0.000000] x8 : ffffcb55ff003228 x7 : 0000000000000000 x6 : 0000000000000048 > [ 0.000000] x5 : 0000000000000000 x4 : ffffa50137f53bd0 x3 : ffffa50136490000 > [ 0.000000] x2 : 0000000000000001 x1 : ffffa5013831e190 x0 : ffff022982003208 > [ 0.000000] Call trace: > [ 0.000000] alloc_vmap_area+0xafc/0xb08 > [ 0.000000] __get_vm_area_node+0x108/0x1e8 > [ 0.000000] __vmalloc_node_range+0x1fc/0x728 > [ 0.000000] __vmalloc_node+0x5c/0x70 > [ 0.000000] init_IRQ+0x90/0x11c > [ 0.000000] start_kernel+0x1ac/0x3bc > [ 0.000000] __primary_switched+0xc4/0xcc > [ 0.000000] Code: f000e300 91062000 943bd9ba 17ffff8f (d4210000) > [ 0.000000] ---[ end trace 0000000000000000 ]--- > [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! > > Compiled with clang 15.0.7 from Arch repos, with > make ARCH=arm64 LLVM=1 Thanks a lot for testing with LLVM, submitting this report, and doing a bisect. I sent a patch to fix this a couple of days ago and Peter pushed it to -tip today, so it should be in the next -next release: https://git.kernel.org/tip/093d9b240a1fa261ff8aeb7c7cc484dedacfda53 Cheers, Nathan
On 9.06.2023 18:13, Nathan Chancellor wrote: > Hi Konrad, > > On Fri, Jun 09, 2023 at 06:10:38PM +0200, Konrad Dybcio wrote: >> >> >> On 31.05.2023 15:08, Peter Zijlstra wrote: >>> Add the try_cmpxchg() form to the per-cpu ops. >>> >>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> >>> --- >> +CC Nathan, llvm list >> >> Hi all, this patch seems to break booting on Qualcomm ARM64 platforms >> when compiled with clang (GCC works fine) for some reason..: >> >> next-20230605 - works >> next-20230606 - doesn't >> >> grev -m 1 dc4e51fd9846 on next-20230606 - works again >> b4 shazam <this_msgid> -P 1-4 - still works >> b4 shazam <this_msgid> -P 5 - breaks >> >> Confirmed on at least Qualcomm QCM2290, SM8250. >> >> Checking the serial console, it hits a BUG_ON: >> >> [ 0.000000] ------------[ cut here ]------------ >> [ 0.000000] kernel BUG at mm/vmalloc.c:1638! >> [ 0.000000] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP >> [ 0.000000] Modules linked in: >> [ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [snip] >> [ 0.000000] Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) >> [ 0.000000] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) >> [ 0.000000] pc : alloc_vmap_area+0xafc/0xb08 >> [ 0.000000] lr : alloc_vmap_area+0x9e4/0xb08 >> [ 0.000000] sp : ffffa50137f53c20 >> [ 0.000000] x29: ffffa50137f53c60 x28: ffffa50137f30c18 x27: 0000000000000000 >> [ 0.000000] x26: 0000000000007fff x25: ffff800080000000 x24: 000000000000cfff >> [ 0.000000] x23: ffffffffffff8000 x22: ffffa50137fef970 x21: fffffbfff0000000 >> [ 0.000000] x20: ffff022982003208 x19: ffff0229820031f8 x18: ffffa50137f64f70 >> [ 0.000000] x17: ffffa50137fef980 x16: ffffa501375e6d08 x15: 0000000000000001 >> [ 0.000000] x14: ffffa5013831e1a0 x13: ffffa50137f30c18 x12: 0000000000402dc2 >> [ 0.000000] x11: 0000000000000000 x10: ffff022982003018 x9 : ffffa5013831e188 >> [ 0.000000] x8 : ffffcb55ff003228 x7 : 0000000000000000 x6 : 0000000000000048 >> [ 0.000000] x5 : 0000000000000000 x4 : ffffa50137f53bd0 x3 : ffffa50136490000 >> [ 0.000000] x2 : 0000000000000001 x1 : ffffa5013831e190 x0 : ffff022982003208 >> [ 0.000000] Call trace: >> [ 0.000000] alloc_vmap_area+0xafc/0xb08 >> [ 0.000000] __get_vm_area_node+0x108/0x1e8 >> [ 0.000000] __vmalloc_node_range+0x1fc/0x728 >> [ 0.000000] __vmalloc_node+0x5c/0x70 >> [ 0.000000] init_IRQ+0x90/0x11c >> [ 0.000000] start_kernel+0x1ac/0x3bc >> [ 0.000000] __primary_switched+0xc4/0xcc >> [ 0.000000] Code: f000e300 91062000 943bd9ba 17ffff8f (d4210000) >> [ 0.000000] ---[ end trace 0000000000000000 ]--- >> [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! >> >> Compiled with clang 15.0.7 from Arch repos, with >> make ARCH=arm64 LLVM=1 > > Thanks a lot for testing with LLVM, submitting this report, and doing a > bisect. No, thank *you* for making it even possible ;) I sent a patch to fix this a couple of days ago and Peter pushed > it to -tip today, so it should be in the next -next release: > > https://git.kernel.org/tip/093d9b240a1fa261ff8aeb7c7cc484dedacfda53 Amazing, I can boot the most recent next-20230609 with it again! Konrad > > Cheers, > Nathan
The following commit has been merged into the locking/core branch of tip:
Commit-ID: c5c0ba953b8c969c5d51bf1c57f239866a97c47c
Gitweb: https://git.kernel.org/tip/c5c0ba953b8c969c5d51bf1c57f239866a97c47c
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Wed, 31 May 2023 15:08:38 +02:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 05 Jun 2023 09:36:36 +02:00
percpu: Add {raw,this}_cpu_try_cmpxchg()
Add the try_cmpxchg() form to the per-cpu ops.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.587480729@infradead.org
---
include/asm-generic/percpu.h | 113 ++++++++++++++++++++++++++++++++--
include/linux/percpu-defs.h | 19 ++++++-
2 files changed, 128 insertions(+), 4 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 6432a7f..96af32c 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -89,16 +89,37 @@ do { \
__ret; \
})
-#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
+#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg) \
+({ \
+ typeof(pcp) __val, __old = *(ovalp); \
+ __val = _cmpxchg(pcp, __old, nval); \
+ if (__val != __old) \
+ *(ovalp) = __val; \
+ __val == __old; \
+})
+
+#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \
({ \
typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \
- typeof(pcp) __ret; \
- __ret = *__p; \
- if (__ret == (oval)) \
+ typeof(pcp) __val = *__p, __old = *(ovalp); \
+ bool __ret; \
+ if (__val == __old) { \
*__p = nval; \
+ __ret = true; \
+ } else { \
+ *(ovalp) = __val; \
+ __ret = false; \
+ } \
__ret; \
})
+#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
+({ \
+ typeof(pcp) __old = (oval); \
+ raw_cpu_generic_try_cmpxchg(pcp, &__old, nval); \
+ __old; \
+})
+
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1)); \
@@ -170,6 +191,16 @@ do { \
__ret; \
})
+#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \
+({ \
+ bool __ret; \
+ unsigned long __flags; \
+ raw_local_irq_save(__flags); \
+ __ret = raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval); \
+ raw_local_irq_restore(__flags); \
+ __ret; \
+})
+
#define this_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
typeof(pcp) __ret; \
@@ -282,6 +313,43 @@ do { \
#define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
#endif
+#ifndef raw_cpu_try_cmpxchg_1
+#ifdef raw_cpu_cmpxchg_1
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_1)
+#else
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_2
+#ifdef raw_cpu_cmpxchg_2
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_2)
+#else
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_4
+#ifdef raw_cpu_cmpxchg_4
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_4)
+#else
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_8
+#ifdef raw_cpu_cmpxchg_8
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_8)
+#else
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
#ifndef raw_cpu_cmpxchg_1
#define raw_cpu_cmpxchg_1(pcp, oval, nval) \
raw_cpu_generic_cmpxchg(pcp, oval, nval)
@@ -407,6 +475,43 @@ do { \
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
#endif
+#ifndef this_cpu_try_cmpxchg_1
+#ifdef this_cpu_cmpxchg_1
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_1)
+#else
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_2
+#ifdef this_cpu_cmpxchg_2
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_2)
+#else
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_4
+#ifdef this_cpu_cmpxchg_4
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_4)
+#else
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_8
+#ifdef this_cpu_cmpxchg_8
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_8)
+#else
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+ this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
#ifndef this_cpu_cmpxchg_1
#define this_cpu_cmpxchg_1(pcp, oval, nval) \
this_cpu_generic_cmpxchg(pcp, oval, nval)
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index e60727b..cbbf6d1 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -343,6 +343,21 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { }
pscr2_ret__; \
})
+#define __pcpu_size_call_return2bool(stem, variable, ...) \
+({ \
+ bool pscr2_ret__; \
+ __verify_pcpu_ptr(&(variable)); \
+ switch(sizeof(variable)) { \
+ case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
+ case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \
+ case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \
+ case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
+ default: \
+ __bad_size_call_parameter(); break; \
+ } \
+ pscr2_ret__; \
+})
+
/*
* Special handling for cmpxchg_double. cmpxchg_double is passed two
* percpu variables. The first has to be aligned to a double word
@@ -426,6 +441,8 @@ do { \
#define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
#define raw_cpu_cmpxchg(pcp, oval, nval) \
__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+#define raw_cpu_try_cmpxchg(pcp, ovalp, nval) \
+ __pcpu_size_call_return2bool(raw_cpu_try_cmpxchg_, pcp, ovalp, nval)
#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
@@ -513,6 +530,8 @@ do { \
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
#define this_cpu_cmpxchg(pcp, oval, nval) \
__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#define this_cpu_try_cmpxchg(pcp, ovalp, nval) \
+ __pcpu_size_call_return2bool(this_cpu_try_cmpxchg_, pcp, ovalp, nval)
#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
© 2016 - 2026 Red Hat, Inc.