Architectures are required to provide four-byte cmpxchg() and 64-bit
architectures are additionally required to provide eight-byte cmpxchg().
However, there are cases where one-byte cmpxchg() would be extremely
useful. Therefore, provide cmpxchg_emu_u8() that emulates one-byte
cmpxchg() in terms of four-byte cmpxchg().
Note that this emulations is fully ordered, and can (for example) cause
one-byte cmpxchg_relaxed() to incur the overhead of full ordering.
If this causes problems for a given architecture, that architecture is
free to provide its own lighter-weight primitives.
[ paulmck: Apply Marco Elver feedback. ]
[ paulmck: Apply kernel test robot feedback. ]
[ paulmck: Drop two-byte support per Arnd Bergmann feedback. ]
Link: https://lore.kernel.org/all/0733eb10-5e7a-4450-9b8a-527b97c842ff@paulmck-laptop/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Marco Elver <elver@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: <linux-arch@vger.kernel.org>
---
arch/Kconfig | 3 +++
include/linux/cmpxchg-emu.h | 15 +++++++++++++
lib/Makefile | 1 +
lib/cmpxchg-emu.c | 45 +++++++++++++++++++++++++++++++++++++
4 files changed, 64 insertions(+)
create mode 100644 include/linux/cmpxchg-emu.h
create mode 100644 lib/cmpxchg-emu.c
diff --git a/arch/Kconfig b/arch/Kconfig
index 9f066785bb71d..284663392eef8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1609,4 +1609,7 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT
# strict alignment always, even with -falign-functions.
def_bool CC_HAS_MIN_FUNCTION_ALIGNMENT || CC_IS_CLANG
+config ARCH_NEED_CMPXCHG_1_EMU
+ bool
+
endmenu
diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h
new file mode 100644
index 0000000000000..998deec67740a
--- /dev/null
+++ b/include/linux/cmpxchg-emu.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Emulated 1-byte and 2-byte cmpxchg operations for architectures
+ * lacking direct support for these sizes. These are implemented in terms
+ * of 4-byte cmpxchg operations.
+ *
+ * Copyright (C) 2024 Paul E. McKenney.
+ */
+
+#ifndef __LINUX_CMPXCHG_EMU_H
+#define __LINUX_CMPXCHG_EMU_H
+
+uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
+
+#endif /* __LINUX_CMPXCHG_EMU_H */
diff --git a/lib/Makefile b/lib/Makefile
index ffc6b2341b45a..cc3d52fdb477d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -236,6 +236,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
+obj-$(CONFIG_ARCH_NEED_CMPXCHG_1_EMU) += cmpxchg-emu.o
obj-$(CONFIG_DYNAMIC_DEBUG_CORE) += dynamic_debug.o
#ensure exported functions have prototypes
diff --git a/lib/cmpxchg-emu.c b/lib/cmpxchg-emu.c
new file mode 100644
index 0000000000000..27f6f97cb60dd
--- /dev/null
+++ b/lib/cmpxchg-emu.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Emulated 1-byte cmpxchg operation for architectures lacking direct
+ * support for this size. This is implemented in terms of 4-byte cmpxchg
+ * operations.
+ *
+ * Copyright (C) 2024 Paul E. McKenney.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/instrumented.h>
+#include <linux/atomic.h>
+#include <linux/panic.h>
+#include <linux/bug.h>
+#include <asm-generic/rwonce.h>
+#include <linux/cmpxchg-emu.h>
+
+union u8_32 {
+ u8 b[4];
+ u32 w;
+};
+
+/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
+uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
+{
+ u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
+ int i = ((uintptr_t)p) & 0x3;
+ union u8_32 old32;
+ union u8_32 new32;
+ u32 ret;
+
+ ret = READ_ONCE(*p32);
+ do {
+ old32.w = ret;
+ if (old32.b[i] != old)
+ return old32.b[i];
+ new32.w = old32.w;
+ new32.b[i] = new;
+ instrument_atomic_read_write(p, 1);
+ ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
+ } while (ret != old32.w);
+ return old;
+}
+EXPORT_SYMBOL_GPL(cmpxchg_emu_u8);
--
2.40.1
On Wed, May 01, 2024 at 04:01:26PM -0700, Paul E. McKenney wrote:
> Architectures are required to provide four-byte cmpxchg() and 64-bit
> architectures are additionally required to provide eight-byte cmpxchg().
> However, there are cases where one-byte cmpxchg() would be extremely
> useful. Therefore, provide cmpxchg_emu_u8() that emulates one-byte
> cmpxchg() in terms of four-byte cmpxchg().
>
> Note that this emulations is fully ordered, and can (for example) cause
> one-byte cmpxchg_relaxed() to incur the overhead of full ordering.
> If this causes problems for a given architecture, that architecture is
> free to provide its own lighter-weight primitives.
>
> [ paulmck: Apply Marco Elver feedback. ]
> [ paulmck: Apply kernel test robot feedback. ]
> [ paulmck: Drop two-byte support per Arnd Bergmann feedback. ]
>
> Link: https://lore.kernel.org/all/0733eb10-5e7a-4450-9b8a-527b97c842ff@paulmck-laptop/
>
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Acked-by: Marco Elver <elver@google.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
> Cc: Douglas Anderson <dianders@chromium.org>
> Cc: Petr Mladek <pmladek@suse.com>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: <linux-arch@vger.kernel.org>
> ---
> arch/Kconfig | 3 +++
> include/linux/cmpxchg-emu.h | 15 +++++++++++++
> lib/Makefile | 1 +
> lib/cmpxchg-emu.c | 45 +++++++++++++++++++++++++++++++++++++
> 4 files changed, 64 insertions(+)
> create mode 100644 include/linux/cmpxchg-emu.h
> create mode 100644 lib/cmpxchg-emu.c
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 9f066785bb71d..284663392eef8 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1609,4 +1609,7 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT
> # strict alignment always, even with -falign-functions.
> def_bool CC_HAS_MIN_FUNCTION_ALIGNMENT || CC_IS_CLANG
>
> +config ARCH_NEED_CMPXCHG_1_EMU
> + bool
> +
> endmenu
> diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h
> new file mode 100644
> index 0000000000000..998deec67740a
> --- /dev/null
> +++ b/include/linux/cmpxchg-emu.h
> @@ -0,0 +1,15 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + * Emulated 1-byte and 2-byte cmpxchg operations for architectures
> + * lacking direct support for these sizes. These are implemented in terms
> + * of 4-byte cmpxchg operations.
> + *
> + * Copyright (C) 2024 Paul E. McKenney.
> + */
> +
> +#ifndef __LINUX_CMPXCHG_EMU_H
> +#define __LINUX_CMPXCHG_EMU_H
> +
> +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
> +
> +#endif /* __LINUX_CMPXCHG_EMU_H */
> diff --git a/lib/Makefile b/lib/Makefile
> index ffc6b2341b45a..cc3d52fdb477d 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -236,6 +236,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> lib-$(CONFIG_GENERIC_BUG) += bug.o
>
> obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
> +obj-$(CONFIG_ARCH_NEED_CMPXCHG_1_EMU) += cmpxchg-emu.o
>
> obj-$(CONFIG_DYNAMIC_DEBUG_CORE) += dynamic_debug.o
> #ensure exported functions have prototypes
> diff --git a/lib/cmpxchg-emu.c b/lib/cmpxchg-emu.c
> new file mode 100644
> index 0000000000000..27f6f97cb60dd
> --- /dev/null
> +++ b/lib/cmpxchg-emu.c
> @@ -0,0 +1,45 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Emulated 1-byte cmpxchg operation for architectures lacking direct
> + * support for this size. This is implemented in terms of 4-byte cmpxchg
> + * operations.
> + *
> + * Copyright (C) 2024 Paul E. McKenney.
> + */
> +
> +#include <linux/types.h>
> +#include <linux/export.h>
> +#include <linux/instrumented.h>
> +#include <linux/atomic.h>
> +#include <linux/panic.h>
> +#include <linux/bug.h>
> +#include <asm-generic/rwonce.h>
> +#include <linux/cmpxchg-emu.h>
> +
> +union u8_32 {
> + u8 b[4];
> + u32 w;
> +};
> +
> +/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> +{
> + u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> + int i = ((uintptr_t)p) & 0x3;
> + union u8_32 old32;
> + union u8_32 new32;
> + u32 ret;
> +
> + ret = READ_ONCE(*p32);
> + do {
> + old32.w = ret;
> + if (old32.b[i] != old)
> + return old32.b[i];
> + new32.w = old32.w;
> + new32.b[i] = new;
> + instrument_atomic_read_write(p, 1);
> + ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
Just out of curiosity, why is this `data_race` needed? cmpxchg is atomic
so there should be no chance for a data race?
Regards,
Boqun
> + } while (ret != old32.w);
> + return old;
> +}
> +EXPORT_SYMBOL_GPL(cmpxchg_emu_u8);
> --
> 2.40.1
>
On Mon, May 13, 2024 at 07:44:00AM -0700, Boqun Feng wrote:
> On Wed, May 01, 2024 at 04:01:26PM -0700, Paul E. McKenney wrote:
> > Architectures are required to provide four-byte cmpxchg() and 64-bit
> > architectures are additionally required to provide eight-byte cmpxchg().
> > However, there are cases where one-byte cmpxchg() would be extremely
> > useful. Therefore, provide cmpxchg_emu_u8() that emulates one-byte
> > cmpxchg() in terms of four-byte cmpxchg().
> >
> > Note that this emulations is fully ordered, and can (for example) cause
> > one-byte cmpxchg_relaxed() to incur the overhead of full ordering.
> > If this causes problems for a given architecture, that architecture is
> > free to provide its own lighter-weight primitives.
> >
> > [ paulmck: Apply Marco Elver feedback. ]
> > [ paulmck: Apply kernel test robot feedback. ]
> > [ paulmck: Drop two-byte support per Arnd Bergmann feedback. ]
> >
> > Link: https://lore.kernel.org/all/0733eb10-5e7a-4450-9b8a-527b97c842ff@paulmck-laptop/
> >
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > Acked-by: Marco Elver <elver@google.com>
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
> > Cc: Douglas Anderson <dianders@chromium.org>
> > Cc: Petr Mladek <pmladek@suse.com>
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: <linux-arch@vger.kernel.org>
> > ---
> > arch/Kconfig | 3 +++
> > include/linux/cmpxchg-emu.h | 15 +++++++++++++
> > lib/Makefile | 1 +
> > lib/cmpxchg-emu.c | 45 +++++++++++++++++++++++++++++++++++++
> > 4 files changed, 64 insertions(+)
> > create mode 100644 include/linux/cmpxchg-emu.h
> > create mode 100644 lib/cmpxchg-emu.c
> >
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 9f066785bb71d..284663392eef8 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -1609,4 +1609,7 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT
> > # strict alignment always, even with -falign-functions.
> > def_bool CC_HAS_MIN_FUNCTION_ALIGNMENT || CC_IS_CLANG
> >
> > +config ARCH_NEED_CMPXCHG_1_EMU
> > + bool
> > +
> > endmenu
> > diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h
> > new file mode 100644
> > index 0000000000000..998deec67740a
> > --- /dev/null
> > +++ b/include/linux/cmpxchg-emu.h
> > @@ -0,0 +1,15 @@
> > +/* SPDX-License-Identifier: GPL-2.0+ */
> > +/*
> > + * Emulated 1-byte and 2-byte cmpxchg operations for architectures
> > + * lacking direct support for these sizes. These are implemented in terms
> > + * of 4-byte cmpxchg operations.
> > + *
> > + * Copyright (C) 2024 Paul E. McKenney.
> > + */
> > +
> > +#ifndef __LINUX_CMPXCHG_EMU_H
> > +#define __LINUX_CMPXCHG_EMU_H
> > +
> > +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
> > +
> > +#endif /* __LINUX_CMPXCHG_EMU_H */
> > diff --git a/lib/Makefile b/lib/Makefile
> > index ffc6b2341b45a..cc3d52fdb477d 100644
> > --- a/lib/Makefile
> > +++ b/lib/Makefile
> > @@ -236,6 +236,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> > lib-$(CONFIG_GENERIC_BUG) += bug.o
> >
> > obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
> > +obj-$(CONFIG_ARCH_NEED_CMPXCHG_1_EMU) += cmpxchg-emu.o
> >
> > obj-$(CONFIG_DYNAMIC_DEBUG_CORE) += dynamic_debug.o
> > #ensure exported functions have prototypes
> > diff --git a/lib/cmpxchg-emu.c b/lib/cmpxchg-emu.c
> > new file mode 100644
> > index 0000000000000..27f6f97cb60dd
> > --- /dev/null
> > +++ b/lib/cmpxchg-emu.c
> > @@ -0,0 +1,45 @@
> > +// SPDX-License-Identifier: GPL-2.0+
> > +/*
> > + * Emulated 1-byte cmpxchg operation for architectures lacking direct
> > + * support for this size. This is implemented in terms of 4-byte cmpxchg
> > + * operations.
> > + *
> > + * Copyright (C) 2024 Paul E. McKenney.
> > + */
> > +
> > +#include <linux/types.h>
> > +#include <linux/export.h>
> > +#include <linux/instrumented.h>
> > +#include <linux/atomic.h>
> > +#include <linux/panic.h>
> > +#include <linux/bug.h>
> > +#include <asm-generic/rwonce.h>
> > +#include <linux/cmpxchg-emu.h>
> > +
> > +union u8_32 {
> > + u8 b[4];
> > + u32 w;
> > +};
> > +
> > +/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > +{
> > + u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > + int i = ((uintptr_t)p) & 0x3;
> > + union u8_32 old32;
> > + union u8_32 new32;
> > + u32 ret;
> > +
> > + ret = READ_ONCE(*p32);
> > + do {
> > + old32.w = ret;
> > + if (old32.b[i] != old)
> > + return old32.b[i];
> > + new32.w = old32.w;
> > + new32.b[i] = new;
> > + instrument_atomic_read_write(p, 1);
> > + ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
>
> Just out of curiosity, why is this `data_race` needed? cmpxchg is atomic
> so there should be no chance for a data race?
That is what I thought, too. ;-)
The problem is that the cmpxchg() covers 32 bits, and so without that
data_race(), KCSAN would complain about data races with perfectly
legitimate concurrent accesses to the other three bytes.
The instrument_atomic_read_write(p, 1) beforehand tells KCSAN to complain
about concurrent accesses, but only to that one byte.
Thanx, Paul
> Regards,
> Boqun
>
> > + } while (ret != old32.w);
> > + return old;
> > +}
> > +EXPORT_SYMBOL_GPL(cmpxchg_emu_u8);
> > --
> > 2.40.1
> >
On Mon, May 13, 2024 at 08:41:27AM -0700, Paul E. McKenney wrote:
[...]
> > > +#include <linux/types.h>
> > > +#include <linux/export.h>
> > > +#include <linux/instrumented.h>
> > > +#include <linux/atomic.h>
> > > +#include <linux/panic.h>
> > > +#include <linux/bug.h>
> > > +#include <asm-generic/rwonce.h>
> > > +#include <linux/cmpxchg-emu.h>
> > > +
> > > +union u8_32 {
> > > + u8 b[4];
> > > + u32 w;
> > > +};
> > > +
> > > +/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > > +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > +{
> > > + u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > > + int i = ((uintptr_t)p) & 0x3;
> > > + union u8_32 old32;
> > > + union u8_32 new32;
> > > + u32 ret;
> > > +
> > > + ret = READ_ONCE(*p32);
> > > + do {
> > > + old32.w = ret;
> > > + if (old32.b[i] != old)
> > > + return old32.b[i];
> > > + new32.w = old32.w;
> > > + new32.b[i] = new;
> > > + instrument_atomic_read_write(p, 1);
> > > + ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> >
> > Just out of curiosity, why is this `data_race` needed? cmpxchg is atomic
> > so there should be no chance for a data race?
>
> That is what I thought, too. ;-)
>
> The problem is that the cmpxchg() covers 32 bits, and so without that
> data_race(), KCSAN would complain about data races with perfectly
> legitimate concurrent accesses to the other three bytes.
>
> The instrument_atomic_read_write(p, 1) beforehand tells KCSAN to complain
> about concurrent accesses, but only to that one byte.
>
Oh, I see. For that purpose, maybe we can just use raw_cmpxchg() here,
i.e. a cmpxchg() without any instrument in it. Cc Mark in case I'm
missing something.
Regards,
Boqun
> Thanx, Paul
>
On Mon, May 13, 2024 at 08:57:16AM -0700, Boqun Feng wrote:
> On Mon, May 13, 2024 at 08:41:27AM -0700, Paul E. McKenney wrote:
> [...]
> > > > +#include <linux/types.h>
> > > > +#include <linux/export.h>
> > > > +#include <linux/instrumented.h>
> > > > +#include <linux/atomic.h>
> > > > +#include <linux/panic.h>
> > > > +#include <linux/bug.h>
> > > > +#include <asm-generic/rwonce.h>
> > > > +#include <linux/cmpxchg-emu.h>
> > > > +
> > > > +union u8_32 {
> > > > + u8 b[4];
> > > > + u32 w;
> > > > +};
> > > > +
> > > > +/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > > > +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > > +{
> > > > + u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > > > + int i = ((uintptr_t)p) & 0x3;
> > > > + union u8_32 old32;
> > > > + union u8_32 new32;
> > > > + u32 ret;
> > > > +
> > > > + ret = READ_ONCE(*p32);
> > > > + do {
> > > > + old32.w = ret;
> > > > + if (old32.b[i] != old)
> > > > + return old32.b[i];
> > > > + new32.w = old32.w;
> > > > + new32.b[i] = new;
> > > > + instrument_atomic_read_write(p, 1);
> > > > + ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> > >
> > > Just out of curiosity, why is this `data_race` needed? cmpxchg is atomic
> > > so there should be no chance for a data race?
> >
> > That is what I thought, too. ;-)
> >
> > The problem is that the cmpxchg() covers 32 bits, and so without that
> > data_race(), KCSAN would complain about data races with perfectly
> > legitimate concurrent accesses to the other three bytes.
> >
> > The instrument_atomic_read_write(p, 1) beforehand tells KCSAN to complain
> > about concurrent accesses, but only to that one byte.
> >
>
> Oh, I see. For that purpose, maybe we can just use raw_cmpxchg() here,
> i.e. a cmpxchg() without any instrument in it. Cc Mark in case I'm
> missing something.
>
I just realized that the KCSAN instrumentation is already done in
cmpxchg() layer:
#define cmpxchg(ptr, ...) \
({ \
typeof(ptr) __ai_ptr = (ptr); \
kcsan_mb(); \
instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
raw_cmpxchg(__ai_ptr, __VA_ARGS__); \
})
and, this function is lower in the layer, so it shouldn't have the
instrumentation itself. How about the following (based on today's RCU
dev branch)?
Regards,
Boqun
-------------------------------------------->8
Subject: [PATCH] lib: cmpxchg-emu: Make cmpxchg_emu_u8() noinstr
Currently, cmpxchg_emu_u8() is called via cmpxchg() or raw_cmpxchg()
which already makes the instrumentation decision:
* cmpxchg() case:
cmpxchg():
kcsan_mb();
instrument_atomic_read_write(...);
raw_cmpxchg():
arch_cmpxchg():
cmpxchg_emu_u8();
... should have KCSAN instrumentation.
* raw_cmpxchg() case:
raw_cmpxchg():
arch_cmpxchg():
cmpxchg_emu_u8();
... shouldn't have KCSAN instrumentation.
Therefore it's redundant to put KCSAN instrumentation in
cmpxchg_emu_u8() (along with the data_race() to get away the
instrumentation).
So make cmpxchg_emu_u8() a noinstr function, and remove the KCSAN
instrumentation inside it.
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
---
include/linux/cmpxchg-emu.h | 4 +++-
lib/cmpxchg-emu.c | 14 ++++++++++----
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h
index 998deec67740..c4c85f41d9f4 100644
--- a/include/linux/cmpxchg-emu.h
+++ b/include/linux/cmpxchg-emu.h
@@ -10,6 +10,8 @@
#ifndef __LINUX_CMPXCHG_EMU_H
#define __LINUX_CMPXCHG_EMU_H
-uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
+#include <linux/compiler.h>
+
+noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
#endif /* __LINUX_CMPXCHG_EMU_H */
diff --git a/lib/cmpxchg-emu.c b/lib/cmpxchg-emu.c
index 27f6f97cb60d..788c22cd4462 100644
--- a/lib/cmpxchg-emu.c
+++ b/lib/cmpxchg-emu.c
@@ -21,8 +21,13 @@ union u8_32 {
u32 w;
};
-/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
-uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
+/*
+ * Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg.
+ *
+ * This function is marked as 'noinstr' as the instrumentation should be done at
+ * outer layer.
+ */
+noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
{
u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
int i = ((uintptr_t)p) & 0x3;
@@ -37,8 +42,9 @@ uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
return old32.b[i];
new32.w = old32.w;
new32.b[i] = new;
- instrument_atomic_read_write(p, 1);
- ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
+
+ // raw_cmpxchg() is used here to avoid instrumentation.
+ ret = raw_cmpxchg(p32, old32.w, new32.w); // Overridden above.
} while (ret != old32.w);
return old;
}
--
2.44.0
On Mon, May 13, 2024 at 02:19:37PM -0700, Boqun Feng wrote:
> On Mon, May 13, 2024 at 08:57:16AM -0700, Boqun Feng wrote:
> > On Mon, May 13, 2024 at 08:41:27AM -0700, Paul E. McKenney wrote:
> > [...]
> > > > > +#include <linux/types.h>
> > > > > +#include <linux/export.h>
> > > > > +#include <linux/instrumented.h>
> > > > > +#include <linux/atomic.h>
> > > > > +#include <linux/panic.h>
> > > > > +#include <linux/bug.h>
> > > > > +#include <asm-generic/rwonce.h>
> > > > > +#include <linux/cmpxchg-emu.h>
> > > > > +
> > > > > +union u8_32 {
> > > > > + u8 b[4];
> > > > > + u32 w;
> > > > > +};
> > > > > +
> > > > > +/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > > > > +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > > > +{
> > > > > + u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > > > > + int i = ((uintptr_t)p) & 0x3;
> > > > > + union u8_32 old32;
> > > > > + union u8_32 new32;
> > > > > + u32 ret;
> > > > > +
> > > > > + ret = READ_ONCE(*p32);
> > > > > + do {
> > > > > + old32.w = ret;
> > > > > + if (old32.b[i] != old)
> > > > > + return old32.b[i];
> > > > > + new32.w = old32.w;
> > > > > + new32.b[i] = new;
> > > > > + instrument_atomic_read_write(p, 1);
> > > > > + ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> > > >
> > > > Just out of curiosity, why is this `data_race` needed? cmpxchg is atomic
> > > > so there should be no chance for a data race?
> > >
> > > That is what I thought, too. ;-)
> > >
> > > The problem is that the cmpxchg() covers 32 bits, and so without that
> > > data_race(), KCSAN would complain about data races with perfectly
> > > legitimate concurrent accesses to the other three bytes.
> > >
> > > The instrument_atomic_read_write(p, 1) beforehand tells KCSAN to complain
> > > about concurrent accesses, but only to that one byte.
> > >
> >
> > Oh, I see. For that purpose, maybe we can just use raw_cmpxchg() here,
> > i.e. a cmpxchg() without any instrument in it. Cc Mark in case I'm
> > missing something.
> >
>
> I just realized that the KCSAN instrumentation is already done in
> cmpxchg() layer:
>
> #define cmpxchg(ptr, ...) \
> ({ \
> typeof(ptr) __ai_ptr = (ptr); \
> kcsan_mb(); \
> instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
> raw_cmpxchg(__ai_ptr, __VA_ARGS__); \
> })
>
> and, this function is lower in the layer, so it shouldn't have the
> instrumentation itself. How about the following (based on today's RCU
> dev branch)?
The raw_cmpxchg() looks nicer than the added data_race()!
One question below, though.
Thanx, Paul
> Regards,
> Boqun
>
> -------------------------------------------->8
> Subject: [PATCH] lib: cmpxchg-emu: Make cmpxchg_emu_u8() noinstr
>
> Currently, cmpxchg_emu_u8() is called via cmpxchg() or raw_cmpxchg()
> which already makes the instrumentation decision:
>
> * cmpxchg() case:
>
> cmpxchg():
> kcsan_mb();
> instrument_atomic_read_write(...);
> raw_cmpxchg():
> arch_cmpxchg():
> cmpxchg_emu_u8();
>
> ... should have KCSAN instrumentation.
>
> * raw_cmpxchg() case:
>
> raw_cmpxchg():
> arch_cmpxchg():
> cmpxchg_emu_u8();
>
> ... shouldn't have KCSAN instrumentation.
>
> Therefore it's redundant to put KCSAN instrumentation in
> cmpxchg_emu_u8() (along with the data_race() to get away the
> instrumentation).
>
> So make cmpxchg_emu_u8() a noinstr function, and remove the KCSAN
> instrumentation inside it.
>
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> ---
> include/linux/cmpxchg-emu.h | 4 +++-
> lib/cmpxchg-emu.c | 14 ++++++++++----
> 2 files changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h
> index 998deec67740..c4c85f41d9f4 100644
> --- a/include/linux/cmpxchg-emu.h
> +++ b/include/linux/cmpxchg-emu.h
> @@ -10,6 +10,8 @@
> #ifndef __LINUX_CMPXCHG_EMU_H
> #define __LINUX_CMPXCHG_EMU_H
>
> -uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
> +#include <linux/compiler.h>
> +
> +noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
>
> #endif /* __LINUX_CMPXCHG_EMU_H */
> diff --git a/lib/cmpxchg-emu.c b/lib/cmpxchg-emu.c
> index 27f6f97cb60d..788c22cd4462 100644
> --- a/lib/cmpxchg-emu.c
> +++ b/lib/cmpxchg-emu.c
> @@ -21,8 +21,13 @@ union u8_32 {
> u32 w;
> };
>
> -/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> -uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> +/*
> + * Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg.
> + *
> + * This function is marked as 'noinstr' as the instrumentation should be done at
> + * outer layer.
> + */
> +noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> {
> u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> int i = ((uintptr_t)p) & 0x3;
> @@ -37,8 +42,9 @@ uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> return old32.b[i];
> new32.w = old32.w;
> new32.b[i] = new;
> - instrument_atomic_read_write(p, 1);
Don't we need to keep that instrument_atomic_read_write() in order
to allow KCSAN to detect data races with plain C-language reads?
Or is that being handled some other way?
> - ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> +
> + // raw_cmpxchg() is used here to avoid instrumentation.
> + ret = raw_cmpxchg(p32, old32.w, new32.w); // Overridden above.
> } while (ret != old32.w);
> return old;
> }
> --
> 2.44.0
>
On Tue, May 14, 2024 at 07:22:47AM -0700, Paul E. McKenney wrote:
> On Mon, May 13, 2024 at 02:19:37PM -0700, Boqun Feng wrote:
> > On Mon, May 13, 2024 at 08:57:16AM -0700, Boqun Feng wrote:
> > > On Mon, May 13, 2024 at 08:41:27AM -0700, Paul E. McKenney wrote:
> > > [...]
> > > > > > +#include <linux/types.h>
> > > > > > +#include <linux/export.h>
> > > > > > +#include <linux/instrumented.h>
> > > > > > +#include <linux/atomic.h>
> > > > > > +#include <linux/panic.h>
> > > > > > +#include <linux/bug.h>
> > > > > > +#include <asm-generic/rwonce.h>
> > > > > > +#include <linux/cmpxchg-emu.h>
> > > > > > +
> > > > > > +union u8_32 {
> > > > > > + u8 b[4];
> > > > > > + u32 w;
> > > > > > +};
> > > > > > +
> > > > > > +/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > > > > > +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > > > > +{
> > > > > > + u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > > > > > + int i = ((uintptr_t)p) & 0x3;
> > > > > > + union u8_32 old32;
> > > > > > + union u8_32 new32;
> > > > > > + u32 ret;
> > > > > > +
> > > > > > + ret = READ_ONCE(*p32);
> > > > > > + do {
> > > > > > + old32.w = ret;
> > > > > > + if (old32.b[i] != old)
> > > > > > + return old32.b[i];
> > > > > > + new32.w = old32.w;
> > > > > > + new32.b[i] = new;
> > > > > > + instrument_atomic_read_write(p, 1);
> > > > > > + ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> > > > >
> > > > > Just out of curiosity, why is this `data_race` needed? cmpxchg is atomic
> > > > > so there should be no chance for a data race?
> > > >
> > > > That is what I thought, too. ;-)
> > > >
> > > > The problem is that the cmpxchg() covers 32 bits, and so without that
> > > > data_race(), KCSAN would complain about data races with perfectly
> > > > legitimate concurrent accesses to the other three bytes.
> > > >
> > > > The instrument_atomic_read_write(p, 1) beforehand tells KCSAN to complain
> > > > about concurrent accesses, but only to that one byte.
> > > >
> > >
> > > Oh, I see. For that purpose, maybe we can just use raw_cmpxchg() here,
> > > i.e. a cmpxchg() without any instrument in it. Cc Mark in case I'm
> > > missing something.
> > >
> >
> > I just realized that the KCSAN instrumentation is already done in
> > cmpxchg() layer:
> >
> > #define cmpxchg(ptr, ...) \
> > ({ \
> > typeof(ptr) __ai_ptr = (ptr); \
> > kcsan_mb(); \
> > instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
> > raw_cmpxchg(__ai_ptr, __VA_ARGS__); \
> > })
> >
> > and, this function is lower in the layer, so it shouldn't have the
> > instrumentation itself. How about the following (based on today's RCU
> > dev branch)?
>
> The raw_cmpxchg() looks nicer than the added data_race()!
>
> One question below, though.
>
> Thanx, Paul
>
> > Regards,
> > Boqun
> >
> > -------------------------------------------->8
> > Subject: [PATCH] lib: cmpxchg-emu: Make cmpxchg_emu_u8() noinstr
> >
> > Currently, cmpxchg_emu_u8() is called via cmpxchg() or raw_cmpxchg()
> > which already makes the instrumentation decision:
> >
> > * cmpxchg() case:
> >
> > cmpxchg():
> > kcsan_mb();
> > instrument_atomic_read_write(...);
> > raw_cmpxchg():
> > arch_cmpxchg():
> > cmpxchg_emu_u8();
> >
> > ... should have KCSAN instrumentation.
> >
> > * raw_cmpxchg() case:
> >
> > raw_cmpxchg():
> > arch_cmpxchg():
> > cmpxchg_emu_u8();
> >
> > ... shouldn't have KCSAN instrumentation.
> >
> > Therefore it's redundant to put KCSAN instrumentation in
> > cmpxchg_emu_u8() (along with the data_race() to get away the
> > instrumentation).
> >
> > So make cmpxchg_emu_u8() a noinstr function, and remove the KCSAN
> > instrumentation inside it.
> >
> > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > ---
> > include/linux/cmpxchg-emu.h | 4 +++-
> > lib/cmpxchg-emu.c | 14 ++++++++++----
> > 2 files changed, 13 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h
> > index 998deec67740..c4c85f41d9f4 100644
> > --- a/include/linux/cmpxchg-emu.h
> > +++ b/include/linux/cmpxchg-emu.h
> > @@ -10,6 +10,8 @@
> > #ifndef __LINUX_CMPXCHG_EMU_H
> > #define __LINUX_CMPXCHG_EMU_H
> >
> > -uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
> > +#include <linux/compiler.h>
> > +
> > +noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
> >
> > #endif /* __LINUX_CMPXCHG_EMU_H */
> > diff --git a/lib/cmpxchg-emu.c b/lib/cmpxchg-emu.c
> > index 27f6f97cb60d..788c22cd4462 100644
> > --- a/lib/cmpxchg-emu.c
> > +++ b/lib/cmpxchg-emu.c
> > @@ -21,8 +21,13 @@ union u8_32 {
> > u32 w;
> > };
> >
> > -/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > -uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > +/*
> > + * Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg.
> > + *
> > + * This function is marked as 'noinstr' as the instrumentation should be done at
> > + * outer layer.
> > + */
> > +noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > {
> > u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > int i = ((uintptr_t)p) & 0x3;
> > @@ -37,8 +42,9 @@ uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > return old32.b[i];
> > new32.w = old32.w;
> > new32.b[i] = new;
> > - instrument_atomic_read_write(p, 1);
>
> Don't we need to keep that instrument_atomic_read_write() in order
> to allow KCSAN to detect data races with plain C-language reads?
> Or is that being handled some other way?
>
I think that's already covered by the current code, cmpxchg_emu_u8() is
called from cmpxchg() macro, which has a:
instrument_atomic_read_write(p, sizeof(*p));
in it, and in the cmpxchg((u8*), ..) case, 'sizeof(*p)' is obviously 1
;-)
Regards,
Boqun
> > - ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> > +
> > + // raw_cmpxchg() is used here to avoid instrumentation.
> > + ret = raw_cmpxchg(p32, old32.w, new32.w); // Overridden above.
> > } while (ret != old32.w);
> > return old;
> > }
> > --
> > 2.44.0
> >
On Tue, May 14, 2024 at 07:53:20AM -0700, Boqun Feng wrote:
> On Tue, May 14, 2024 at 07:22:47AM -0700, Paul E. McKenney wrote:
> > On Mon, May 13, 2024 at 02:19:37PM -0700, Boqun Feng wrote:
> > > On Mon, May 13, 2024 at 08:57:16AM -0700, Boqun Feng wrote:
> > > > On Mon, May 13, 2024 at 08:41:27AM -0700, Paul E. McKenney wrote:
> > > > [...]
> > > > > > > +#include <linux/types.h>
> > > > > > > +#include <linux/export.h>
> > > > > > > +#include <linux/instrumented.h>
> > > > > > > +#include <linux/atomic.h>
> > > > > > > +#include <linux/panic.h>
> > > > > > > +#include <linux/bug.h>
> > > > > > > +#include <asm-generic/rwonce.h>
> > > > > > > +#include <linux/cmpxchg-emu.h>
> > > > > > > +
> > > > > > > +union u8_32 {
> > > > > > > + u8 b[4];
> > > > > > > + u32 w;
> > > > > > > +};
> > > > > > > +
> > > > > > > +/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > > > > > > +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > > > > > +{
> > > > > > > + u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > > > > > > + int i = ((uintptr_t)p) & 0x3;
> > > > > > > + union u8_32 old32;
> > > > > > > + union u8_32 new32;
> > > > > > > + u32 ret;
> > > > > > > +
> > > > > > > + ret = READ_ONCE(*p32);
> > > > > > > + do {
> > > > > > > + old32.w = ret;
> > > > > > > + if (old32.b[i] != old)
> > > > > > > + return old32.b[i];
> > > > > > > + new32.w = old32.w;
> > > > > > > + new32.b[i] = new;
> > > > > > > + instrument_atomic_read_write(p, 1);
> > > > > > > + ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> > > > > >
> > > > > > Just out of curiosity, why is this `data_race` needed? cmpxchg is atomic
> > > > > > so there should be no chance for a data race?
> > > > >
> > > > > That is what I thought, too. ;-)
> > > > >
> > > > > The problem is that the cmpxchg() covers 32 bits, and so without that
> > > > > data_race(), KCSAN would complain about data races with perfectly
> > > > > legitimate concurrent accesses to the other three bytes.
> > > > >
> > > > > The instrument_atomic_read_write(p, 1) beforehand tells KCSAN to complain
> > > > > about concurrent accesses, but only to that one byte.
> > > > >
> > > >
> > > > Oh, I see. For that purpose, maybe we can just use raw_cmpxchg() here,
> > > > i.e. a cmpxchg() without any instrument in it. Cc Mark in case I'm
> > > > missing something.
> > > >
> > >
> > > I just realized that the KCSAN instrumentation is already done in
> > > cmpxchg() layer:
> > >
> > > #define cmpxchg(ptr, ...) \
> > > ({ \
> > > typeof(ptr) __ai_ptr = (ptr); \
> > > kcsan_mb(); \
> > > instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
> > > raw_cmpxchg(__ai_ptr, __VA_ARGS__); \
> > > })
> > >
> > > and, this function is lower in the layer, so it shouldn't have the
> > > instrumentation itself. How about the following (based on today's RCU
> > > dev branch)?
> >
> > The raw_cmpxchg() looks nicer than the added data_race()!
> >
> > One question below, though.
> >
> > Thanx, Paul
> >
> > > Regards,
> > > Boqun
> > >
> > > -------------------------------------------->8
> > > Subject: [PATCH] lib: cmpxchg-emu: Make cmpxchg_emu_u8() noinstr
> > >
> > > Currently, cmpxchg_emu_u8() is called via cmpxchg() or raw_cmpxchg()
> > > which already makes the instrumentation decision:
> > >
> > > * cmpxchg() case:
> > >
> > > cmpxchg():
> > > kcsan_mb();
> > > instrument_atomic_read_write(...);
> > > raw_cmpxchg():
> > > arch_cmpxchg():
> > > cmpxchg_emu_u8();
> > >
> > > ... should have KCSAN instrumentation.
> > >
> > > * raw_cmpxchg() case:
> > >
> > > raw_cmpxchg():
> > > arch_cmpxchg():
> > > cmpxchg_emu_u8();
> > >
> > > ... shouldn't have KCSAN instrumentation.
> > >
> > > Therefore it's redundant to put KCSAN instrumentation in
> > > cmpxchg_emu_u8() (along with the data_race() to get away the
> > > instrumentation).
> > >
> > > So make cmpxchg_emu_u8() a noinstr function, and remove the KCSAN
> > > instrumentation inside it.
> > >
> > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > > ---
> > > include/linux/cmpxchg-emu.h | 4 +++-
> > > lib/cmpxchg-emu.c | 14 ++++++++++----
> > > 2 files changed, 13 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h
> > > index 998deec67740..c4c85f41d9f4 100644
> > > --- a/include/linux/cmpxchg-emu.h
> > > +++ b/include/linux/cmpxchg-emu.h
> > > @@ -10,6 +10,8 @@
> > > #ifndef __LINUX_CMPXCHG_EMU_H
> > > #define __LINUX_CMPXCHG_EMU_H
> > >
> > > -uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
> > > +#include <linux/compiler.h>
> > > +
> > > +noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new);
> > >
> > > #endif /* __LINUX_CMPXCHG_EMU_H */
> > > diff --git a/lib/cmpxchg-emu.c b/lib/cmpxchg-emu.c
> > > index 27f6f97cb60d..788c22cd4462 100644
> > > --- a/lib/cmpxchg-emu.c
> > > +++ b/lib/cmpxchg-emu.c
> > > @@ -21,8 +21,13 @@ union u8_32 {
> > > u32 w;
> > > };
> > >
> > > -/* Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg. */
> > > -uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > +/*
> > > + * Emulate one-byte cmpxchg() in terms of 4-byte cmpxchg.
> > > + *
> > > + * This function is marked as 'noinstr' as the instrumentation should be done at
> > > + * outer layer.
> > > + */
> > > +noinstr uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > {
> > > u32 *p32 = (u32 *)(((uintptr_t)p) & ~0x3);
> > > int i = ((uintptr_t)p) & 0x3;
> > > @@ -37,8 +42,9 @@ uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new)
> > > return old32.b[i];
> > > new32.w = old32.w;
> > > new32.b[i] = new;
> > > - instrument_atomic_read_write(p, 1);
> >
> > Don't we need to keep that instrument_atomic_read_write() in order
> > to allow KCSAN to detect data races with plain C-language reads?
> > Or is that being handled some other way?
> >
>
> I think that's already covered by the current code, cmpxchg_emu_u8() is
> called from cmpxchg() macro, which has a:
>
> instrument_atomic_read_write(p, sizeof(*p));
>
> in it, and in the cmpxchg((u8*), ..) case, 'sizeof(*p)' is obviously 1
> ;-)
I believe you, but could you nevertheless please test it? ;-)
Thanx, Paul
> Regards,
> Boqun
>
> > > - ret = data_race(cmpxchg(p32, old32.w, new32.w)); // Overridden above.
> > > +
> > > + // raw_cmpxchg() is used here to avoid instrumentation.
> > > + ret = raw_cmpxchg(p32, old32.w, new32.w); // Overridden above.
> > > } while (ret != old32.w);
> > > return old;
> > > }
> > > --
> > > 2.44.0
> > >
© 2016 - 2025 Red Hat, Inc.