The value preload before the cmpxchg loop does not need to be atomic,
but should use READ_ONCE to prevent compiler from merging, refetching
or reordering the read.
This patch unifies arch_atomic{,64}_{,fetch}_{and,or,xor}() macros
between x86_32 and x86_64 targets.
No functional changes intended.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
arch/x86/include/asm/atomic.h | 8 ++++----
arch/x86/include/asm/atomic64_64.h | 20 ++++++++++----------
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 55a55ec04350..b166da21ee98 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -20,7 +20,7 @@ static __always_inline int arch_atomic_read(const atomic_t *v)
* Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
* it's non-inlined function that increases binary size and stack usage.
*/
- return __READ_ONCE((v)->counter);
+ return __READ_ONCE(v->counter);
}
static __always_inline void arch_atomic_set(atomic_t *v, int i)
@@ -132,7 +132,7 @@ static __always_inline void arch_atomic_and(int i, atomic_t *v)
static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
{
- int val = arch_atomic_read(v);
+ int val = __READ_ONCE(v->counter);
do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i));
@@ -150,7 +150,7 @@ static __always_inline void arch_atomic_or(int i, atomic_t *v)
static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v)
{
- int val = arch_atomic_read(v);
+ int val = __READ_ONCE(v->counter);
do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i));
@@ -168,7 +168,7 @@ static __always_inline void arch_atomic_xor(int i, atomic_t *v)
static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v)
{
- int val = arch_atomic_read(v);
+ int val = __READ_ONCE(v->counter);
do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i));
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 3165c0feedf7..e7b12a48fecb 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -12,7 +12,7 @@
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
- return __READ_ONCE((v)->counter);
+ return __READ_ONCE(v->counter);
}
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
@@ -126,10 +126,10 @@ static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- s64 val = arch_atomic64_read(v);
+ s64 val = __READ_ONCE(v->counter);
+
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
- do {
- } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
@@ -144,10 +144,10 @@ static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- s64 val = arch_atomic64_read(v);
+ s64 val = __READ_ONCE(v->counter);
+
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
- do {
- } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
@@ -162,10 +162,10 @@ static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- s64 val = arch_atomic64_read(v);
+ s64 val = __READ_ONCE(v->counter);
+
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
- do {
- } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
--
2.44.0
On Tue, Apr 09, 2024 at 12:03:54PM +0200, Uros Bizjak wrote:
> The value preload before the cmpxchg loop does not need to be atomic,
> but should use READ_ONCE to prevent compiler from merging, refetching
> or reordering the read.
>
Yes, and that's what arch_atomic_read() and arch_atomic64_read() do...
> This patch unifies arch_atomic{,64}_{,fetch}_{and,or,xor}() macros
> between x86_32 and x86_64 targets.
>
> No functional changes intended.
>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> ---
> arch/x86/include/asm/atomic.h | 8 ++++----
> arch/x86/include/asm/atomic64_64.h | 20 ++++++++++----------
> 2 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
> index 55a55ec04350..b166da21ee98 100644
> --- a/arch/x86/include/asm/atomic.h
> +++ b/arch/x86/include/asm/atomic.h
> @@ -20,7 +20,7 @@ static __always_inline int arch_atomic_read(const atomic_t *v)
> * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
> * it's non-inlined function that increases binary size and stack usage.
> */
> - return __READ_ONCE((v)->counter);
> + return __READ_ONCE(v->counter);
Removing the unncessary brackets is fine, but the commit message didn't mention this.
[...]
> static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
> {
> - int val = arch_atomic_read(v);
> + int val = __READ_ONCE(v->counter);
This is the wrong thing to do; arch_atomic_read() already has the required
semantic, and it more clearly aligns with the use of arch_atomic_try_cmpxchg()
below. It contains the documentation regarding why we use __READ_ONCE()
specifically (which we should probably note in arch_atomic64_read()).
Please leave this as-is, and likewise for the other cases below. Similarly, the
prior patch should use arch_atomic{,_64}_read() rather than using
__READ_ONCE().
[...]
> static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
> {
> - s64 val = arch_atomic64_read(v);
> + s64 val = __READ_ONCE(v->counter);
> +
> + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
>
> - do {
> - } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
> return val;
I think this reformatting is what you meant in the commit message when you said:
| This patch unifies arch_atomic{,64}_{,fetch}_{and,or,xor}() macros
| between x86_32 and x86_64 targets.
Assuming so, can you please jsut do that, and say:
This patch reformats the x86_64 arch_atomic{,64}_{,fetch}_{and,or,xor}()
functions to match the x86_32 versions.
Mark.
On Tue, Apr 9, 2024 at 1:07 PM Mark Rutland <mark.rutland@arm.com> wrote:
>
> On Tue, Apr 09, 2024 at 12:03:54PM +0200, Uros Bizjak wrote:
> > The value preload before the cmpxchg loop does not need to be atomic,
> > but should use READ_ONCE to prevent compiler from merging, refetching
> > or reordering the read.
> >
>
> Yes, and that's what arch_atomic_read() and arch_atomic64_read() do...
>
> > This patch unifies arch_atomic{,64}_{,fetch}_{and,or,xor}() macros
> > between x86_32 and x86_64 targets.
> >
> > No functional changes intended.
> >
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@kernel.org>
> > Cc: Borislav Petkov <bp@alien8.de>
> > Cc: Dave Hansen <dave.hansen@linux.intel.com>
> > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > ---
> > arch/x86/include/asm/atomic.h | 8 ++++----
> > arch/x86/include/asm/atomic64_64.h | 20 ++++++++++----------
> > 2 files changed, 14 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
> > index 55a55ec04350..b166da21ee98 100644
> > --- a/arch/x86/include/asm/atomic.h
> > +++ b/arch/x86/include/asm/atomic.h
> > @@ -20,7 +20,7 @@ static __always_inline int arch_atomic_read(const atomic_t *v)
> > * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
> > * it's non-inlined function that increases binary size and stack usage.
> > */
> > - return __READ_ONCE((v)->counter);
> > + return __READ_ONCE(v->counter);
>
> Removing the unncessary brackets is fine, but the commit message didn't mention this.
It was just a change in passing. I didn't think it even needed a comment.
> [...]
>
> > static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
> > {
> > - int val = arch_atomic_read(v);
> > + int val = __READ_ONCE(v->counter);
>
> This is the wrong thing to do; arch_atomic_read() already has the required
> semantic, and it more clearly aligns with the use of arch_atomic_try_cmpxchg()
> below. It contains the documentation regarding why we use __READ_ONCE()
> specifically (which we should probably note in arch_atomic64_read()).
>
> Please leave this as-is, and likewise for the other cases below. Similarly, the
> prior patch should use arch_atomic{,_64}_read() rather than using
> __READ_ONCE().
Please note that arch_atomic64_read implements true 64-bit atomic read
on x86_32. I tried to bypass this using __READ_ONCE(), but your
suggestion to use arch_atomic64_read_tearable() is indeed a much
better approach.
> [...]
>
> > static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
> > {
> > - s64 val = arch_atomic64_read(v);
> > + s64 val = __READ_ONCE(v->counter);
> > +
> > + do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
> >
> > - do {
> > - } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
> > return val;
>
> I think this reformatting is what you meant in the commit message when you said:
>
> | This patch unifies arch_atomic{,64}_{,fetch}_{and,or,xor}() macros
> | between x86_32 and x86_64 targets.
Actually, it was the change to use __READ_ONCE(). The reformatting
follows the functions from atomic.h, e.g. arch_atomic_fetch_and() and
was another case of change in passing, not worth mentioning in the
commit message. I will rewrite this in v2 of the patch, so these
functions will uniformly use arch_atomic64_read_tearable().
> Assuming so, can you please jsut do that, and say:
>
> This patch reformats the x86_64 arch_atomic{,64}_{,fetch}_{and,or,xor}()
> functions to match the x86_32 versions.
Thanks,
Uros.
© 2016 - 2026 Red Hat, Inc.