The header was taken from Linux kernl 6.4.0-rc1.
Addionally, were updated:
* add emulation of {cmp}xchg for 1/2 byte types
* replace tabs with spaces
* replace __* varialbed with *__
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V3:
- update the commit message
- add emulation of {cmp}xchg_... for 1 and 2 bytes types
---
Changes in V2:
- update the comment at the top of the header.
- change xen/lib.h to xen/bug.h.
- sort inclusion of headers properly.
---
xen/arch/riscv/include/asm/cmpxchg.h | 496 +++++++++++++++++++++++++++
1 file changed, 496 insertions(+)
create mode 100644 xen/arch/riscv/include/asm/cmpxchg.h
diff --git a/xen/arch/riscv/include/asm/cmpxchg.h b/xen/arch/riscv/include/asm/cmpxchg.h
new file mode 100644
index 0000000000..916776c403
--- /dev/null
+++ b/xen/arch/riscv/include/asm/cmpxchg.h
@@ -0,0 +1,496 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2014 Regents of the University of California */
+
+#ifndef _ASM_RISCV_CMPXCHG_H
+#define _ASM_RISCV_CMPXCHG_H
+
+#include <xen/compiler.h>
+#include <xen/lib.h>
+
+#include <asm/fence.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+#define __xchg_relaxed(ptr, new, size) \
+({ \
+ __typeof__(ptr) ptr__ = (ptr); \
+ __typeof__(new) new__ = (new); \
+ __typeof__(*(ptr)) ret__; \
+ switch (size) \
+ { \
+ case 4: \
+ asm volatile( \
+ " amoswap.w %0, %2, %1\n" \
+ : "=r" (ret__), "+A" (*ptr__) \
+ : "r" (new__) \
+ : "memory" ); \
+ break; \
+ case 8: \
+ asm volatile( \
+ " amoswap.d %0, %2, %1\n" \
+ : "=r" (ret__), "+A" (*ptr__) \
+ : "r" (new__) \
+ : "memory" ); \
+ break; \
+ default: \
+ ASSERT_UNREACHABLE(); \
+ } \
+ ret__; \
+})
+
+#define xchg_relaxed(ptr, x) \
+({ \
+ __typeof__(*(ptr)) x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_relaxed((ptr), x_, sizeof(*(ptr))); \
+})
+
+#define __xchg_acquire(ptr, new, size) \
+({ \
+ __typeof__(ptr) ptr__ = (ptr); \
+ __typeof__(new) new__ = (new); \
+ __typeof__(*(ptr)) ret__; \
+ switch (size) \
+ { \
+ case 4: \
+ asm volatile( \
+ " amoswap.w %0, %2, %1\n" \
+ RISCV_ACQUIRE_BARRIER \
+ : "=r" (ret__), "+A" (*ptr__) \
+ : "r" (new__) \
+ : "memory" ); \
+ break; \
+ case 8: \
+ asm volatile( \
+ " amoswap.d %0, %2, %1\n" \
+ RISCV_ACQUIRE_BARRIER \
+ : "=r" (ret__), "+A" (*ptr__) \
+ : "r" (new__) \
+ : "memory" ); \
+ break; \
+ default: \
+ ASSERT_UNREACHABLE(); \
+ } \
+ ret__; \
+})
+
+#define xchg_acquire(ptr, x) \
+({ \
+ __typeof__(*(ptr)) x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_acquire((ptr), x_, sizeof(*(ptr))); \
+})
+
+#define __xchg_release(ptr, new, size) \
+({ \
+ __typeof__(ptr) ptr__ = (ptr); \
+ __typeof__(new) new__ = (new); \
+ __typeof__(*(ptr)) ret__; \
+ switch (size) \
+ { \
+ case 4: \
+ asm volatile ( \
+ RISCV_RELEASE_BARRIER \
+ " amoswap.w %0, %2, %1\n" \
+ : "=r" (ret__), "+A" (*ptr__) \
+ : "r" (new__) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ( \
+ RISCV_RELEASE_BARRIER \
+ " amoswap.d %0, %2, %1\n" \
+ : "=r" (ret__), "+A" (*ptr__) \
+ : "r" (new__) \
+ : "memory"); \
+ break; \
+ default: \
+ ASSERT_UNREACHABLE(); \
+ } \
+ ret__; \
+})
+
+#define xchg_release(ptr, x) \
+({ \
+ __typeof__(*(ptr)) x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_release((ptr), x_, sizeof(*(ptr))); \
+})
+
+static always_inline uint32_t __xchg_case_4(volatile uint32_t *ptr,
+ uint32_t new)
+{
+ __typeof__(*(ptr)) ret;
+
+ asm volatile (
+ " amoswap.w.aqrl %0, %2, %1\n"
+ : "=r" (ret), "+A" (*ptr)
+ : "r" (new)
+ : "memory" );
+
+ return ret;
+}
+
+static always_inline uint64_t __xchg_case_8(volatile uint64_t *ptr,
+ uint64_t new)
+{
+ __typeof__(*(ptr)) ret;
+
+ asm volatile( \
+ " amoswap.d.aqrl %0, %2, %1\n" \
+ : "=r" (ret), "+A" (*ptr) \
+ : "r" (new) \
+ : "memory" ); \
+
+ return ret;
+}
+
+static always_inline unsigned short __cmpxchg_case_2(volatile uint32_t *ptr,
+ uint32_t old,
+ uint32_t new);
+
+static always_inline unsigned short __cmpxchg_case_1(volatile uint32_t *ptr,
+ uint32_t old,
+ uint32_t new);
+
+static inline unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
+{
+ switch (size) {
+ case 1:
+ return __cmpxchg_case_1(ptr, (uint32_t)-1, x);
+ case 2:
+ return __cmpxchg_case_2(ptr, (uint32_t)-1, x);
+ case 4:
+ return __xchg_case_4(ptr, x);
+ case 8:
+ return __xchg_case_8(ptr, x);
+ default:
+ ASSERT_UNREACHABLE();
+ }
+
+ return -1;
+}
+
+#define xchg(ptr,x) \
+({ \
+ __typeof__(*(ptr)) ret__; \
+ ret__ = (__typeof__(*(ptr))) \
+ __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \
+ ret__; \
+})
+
+#define xchg32(ptr, x) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
+ xchg((ptr), (x)); \
+})
+
+#define xchg64(ptr, x) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ xchg((ptr), (x)); \
+})
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ */
+#define __cmpxchg_relaxed(ptr, old, new, size) \
+({ \
+ __typeof__(ptr) ptr__ = (ptr); \
+ __typeof__(*(ptr)) __old = (old); \
+ __typeof__(*(ptr)) new__ = (new); \
+ __typeof__(*(ptr)) ret__; \
+ register unsigned int __rc; \
+ switch (size) \
+ { \
+ case 4: \
+ asm volatile( \
+ "0: lr.w %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc.w %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ "1:\n" \
+ : "=&r" (ret__), "=&r" (__rc), "+A" (*ptr__) \
+ : "rJ" (__old), "rJ" (new__) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile( \
+ "0: lr.d %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc.d %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ "1:\n" \
+ : "=&r" (ret__), "=&r" (__rc), "+A" (*ptr__) \
+ : "rJ" (__old), "rJ" (new__) \
+ : "memory"); \
+ break; \
+ default: \
+ ASSERT_UNREACHABLE(); \
+ } \
+ ret__; \
+})
+
+#define cmpxchg_relaxed(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) o_ = (o); \
+ __typeof__(*(ptr)) n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
+ o_, n_, sizeof(*(ptr))); \
+})
+
+#define __cmpxchg_acquire(ptr, old, new, size) \
+({ \
+ __typeof__(ptr) ptr__ = (ptr); \
+ __typeof__(*(ptr)) __old = (old); \
+ __typeof__(*(ptr)) new__ = (new); \
+ __typeof__(*(ptr)) ret__; \
+ register unsigned int __rc; \
+ switch (size) \
+ { \
+ case 4: \
+ asm volatile( \
+ "0: lr.w %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc.w %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ RISCV_ACQUIRE_BARRIER \
+ "1:\n" \
+ : "=&r" (ret__), "=&r" (__rc), "+A" (*ptr__) \
+ : "rJ" (__old), "rJ" (new__) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile( \
+ "0: lr.d %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc.d %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ RISCV_ACQUIRE_BARRIER \
+ "1:\n" \
+ : "=&r" (ret__), "=&r" (__rc), "+A" (*ptr__) \
+ : "rJ" (__old), "rJ" (new__) \
+ : "memory"); \
+ break; \
+ default: \
+ ASSERT_UNREACHABLE(); \
+ } \
+ ret__; \
+})
+
+#define cmpxchg_acquire(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) o_ = (o); \
+ __typeof__(*(ptr)) n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), o_, n_, sizeof(*(ptr))); \
+})
+
+#define __cmpxchg_release(ptr, old, new, size) \
+({ \
+ __typeof__(ptr) ptr__ = (ptr); \
+ __typeof__(*(ptr)) __old = (old); \
+ __typeof__(*(ptr)) new__ = (new); \
+ __typeof__(*(ptr)) ret__; \
+ register unsigned int __rc; \
+ switch (size) \
+ { \
+ case 4: \
+ asm volatile ( \
+ RISCV_RELEASE_BARRIER \
+ "0: lr.w %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc.w %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ "1:\n" \
+ : "=&r" (ret__), "=&r" (__rc), "+A" (*ptr__) \
+ : "rJ" (__old), "rJ" (new__) \
+ : "memory" ); \
+ break; \
+ case 8: \
+ asm volatile ( \
+ RISCV_RELEASE_BARRIER \
+ "0: lr.d %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc.d %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ "1:\n" \
+ : "=&r" (ret__), "=&r" (__rc), "+A" (*ptr__) \
+ : "rJ" (__old), "rJ" (new__) \
+ : "memory" ); \
+ break; \
+ default: \
+ ASSERT_UNREACHABLE(); \
+ } \
+ ret__; \
+})
+
+#define cmpxchg_release(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_release((ptr), _o_, _n_, sizeof(*(ptr))); \
+})
+
+static always_inline uint32_t __cmpxchg_case_4(volatile uint32_t *ptr,
+ uint32_t old,
+ uint32_t new)
+{
+ uint32_t ret;
+ register uint32_t rc;
+
+ asm volatile (
+ "0: lr.w %0, %2\n"
+ " bne %0, %z3, 1f\n"
+ " sc.w.rl %1, %z4, %2\n"
+ " bnez %1, 0b\n"
+ " fence rw, rw\n"
+ "1:\n"
+ : "=&r" (ret), "=&r" (rc), "+A" (*ptr)
+ : "rJ" (old), "rJ" (new)
+ : "memory" );
+
+ return ret;
+}
+
+static always_inline uint64_t __cmpxchg_case_8(volatile uint64_t *ptr,
+ uint64_t old,
+ uint64_t new)
+{
+ uint64_t ret;
+ register uint32_t rc;
+
+ asm volatile(
+ "0: lr.d %0, %2\n"
+ " bne %0, %z3, 1f\n"
+ " sc.d.rl %1, %z4, %2\n"
+ " bnez %1, 0b\n"
+ " fence rw, rw\n"
+ "1:\n"
+ : "=&r" (ret), "=&r" (rc), "+A" (*ptr)
+ : "rJ" (old), "rJ" (new)
+ : "memory");
+
+ return ret;
+}
+
+#define __emulate_cmpxchg_case1_2(ptr, new, read_func, cmpxchg_func, swap_byte_mask_base)\
+({ \
+ __typeof__(*(ptr)) read_val; \
+ __typeof__(*(ptr)) swapped_new; \
+ __typeof__(*(ptr)) ret; \
+ __typeof__(*(ptr)) new_ = (__typeof__(*(ptr)))new; \
+ \
+ __typeof__(ptr) aligned_ptr = (__typeof__(ptr))((unsigned long)ptr & ~3); \
+ __typeof__(*(ptr)) mask_off = ((unsigned long)ptr & 3) * 8; \
+ __typeof__(*(ptr)) mask = \
+ (__typeof__(*(ptr)))swap_byte_mask_base << mask_off; \
+ __typeof__(*(ptr)) masked_new = (new_ << mask_off) & mask; \
+ \
+ do { \
+ read_val = read_func(aligned_ptr); \
+ swapped_new = read_val & ~mask; \
+ swapped_new |= masked_new; \
+ ret = cmpxchg_func(aligned_ptr, read_val, swapped_new); \
+ } while ( ret != read_val ); \
+ \
+ ret = MASK_EXTR(swapped_new, mask); \
+ ret; \
+})
+
+static always_inline unsigned short __cmpxchg_case_2(volatile uint32_t *ptr,
+ uint32_t old,
+ uint32_t new)
+{
+ (void) old;
+
+ if (((unsigned long)ptr & 3) == 3)
+ {
+#ifdef CONFIG_64BIT
+ return __emulate_cmpxchg_case1_2((uint64_t *)ptr, new,
+ readq, __cmpxchg_case_8, 0xffffU);
+#else
+ #error "add emulation support of cmpxchg for CONFIG_32BIT"
+#endif
+ }
+ else
+ return __emulate_cmpxchg_case1_2((uint32_t *)ptr, new,
+ readl, __cmpxchg_case_4, 0xffffU);
+}
+
+static always_inline unsigned short __cmpxchg_case_1(volatile uint32_t *ptr,
+ uint32_t old,
+ uint32_t new)
+{
+ (void) old;
+
+ return __emulate_cmpxchg_case1_2((uint32_t *)ptr, new,
+ readl, __cmpxchg_case_4, 0xffU);
+}
+
+static always_inline unsigned long __cmpxchg(volatile void *ptr,
+ unsigned long old,
+ unsigned long new,
+ int size)
+{
+ switch (size)
+ {
+ case 1:
+ return __cmpxchg_case_1(ptr, old, new);
+ case 2:
+ return __cmpxchg_case_2(ptr, old, new);
+ case 4:
+ return __cmpxchg_case_4(ptr, old, new);
+ case 8:
+ return __cmpxchg_case_8(ptr, old, new);
+ default:
+ ASSERT_UNREACHABLE();
+ }
+
+ return old;
+}
+
+#define cmpxchg(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) ret__; \
+ ret__ = (__typeof__(*(ptr))) \
+ __cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \
+ sizeof(*(ptr))); \
+ ret__; \
+})
+
+#define cmpxchg_local(ptr, o, n) \
+ (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
+
+#define cmpxchg32(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
+ cmpxchg((ptr), (o), (n)); \
+})
+
+#define cmpxchg32_local(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
+ cmpxchg_relaxed((ptr), (o), (n)) \
+})
+
+#define cmpxchg64(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg((ptr), (o), (n)); \
+})
+
+#define cmpxchg64_local(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg_relaxed((ptr), (o), (n)); \
+})
+
+#endif /* _ASM_RISCV_CMPXCHG_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--
2.43.0
On 22.12.2023 16:12, Oleksii Kurochko wrote:
> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/cmpxchg.h
> @@ -0,0 +1,496 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* Copyright (C) 2014 Regents of the University of California */
> +
> +#ifndef _ASM_RISCV_CMPXCHG_H
> +#define _ASM_RISCV_CMPXCHG_H
> +
> +#include <xen/compiler.h>
> +#include <xen/lib.h>
> +
> +#include <asm/fence.h>
> +#include <asm/io.h>
> +#include <asm/system.h>
> +
> +#define __xchg_relaxed(ptr, new, size) \
> +({ \
> + __typeof__(ptr) ptr__ = (ptr); \
> + __typeof__(new) new__ = (new); \
> + __typeof__(*(ptr)) ret__; \
I expect the types of new and *ptr want to actually match. Which
you then want to enforce, so that issues at use sites would either
be reported by the compiler, or be permitted by a type conversion
of new.
> + switch (size) \
> + { \
Nit: Hard tab left here. (Also again you want to either stick to
Linux style or fully switch to Xen style.)
> + case 4: \
> + asm volatile( \
> + " amoswap.w %0, %2, %1\n" \
I don't think a leading tab (or leading whitespace in general) is
needed in single-line-output asm()s. The trailing \n also isn't
needed if I'm not mistaken.
> + : "=r" (ret__), "+A" (*ptr__) \
> + : "r" (new__) \
> + : "memory" ); \
> + break; \
> + case 8: \
> + asm volatile( \
> + " amoswap.d %0, %2, %1\n" \
> + : "=r" (ret__), "+A" (*ptr__) \
> + : "r" (new__) \
> + : "memory" ); \
> + break; \
> + default: \
> + ASSERT_UNREACHABLE(); \
If at all possible this wants to trigger a build failure, not a runtime
one.
> + } \
> + ret__; \
> +})
> +
> +#define xchg_relaxed(ptr, x) \
> +({ \
> + __typeof__(*(ptr)) x_ = (x); \
> + (__typeof__(*(ptr))) __xchg_relaxed((ptr), x_, sizeof(*(ptr))); \
Nit: Stray blank after cast. For readability I'd also suggest to
drop parentheses in cases like the first argument passed to
__xchg_relaxed() here.
> +})
For both: What does "relaxed" describe? I'm asking because it's not
really clear whether the memory clobbers are actually needed.
> +#define __xchg_acquire(ptr, new, size) \
> +({ \
> + __typeof__(ptr) ptr__ = (ptr); \
> + __typeof__(new) new__ = (new); \
> + __typeof__(*(ptr)) ret__; \
> + switch (size) \
> + { \
> + case 4: \
> + asm volatile( \
> + " amoswap.w %0, %2, %1\n" \
> + RISCV_ACQUIRE_BARRIER \
> + : "=r" (ret__), "+A" (*ptr__) \
> + : "r" (new__) \
> + : "memory" ); \
> + break; \
> + case 8: \
> + asm volatile( \
> + " amoswap.d %0, %2, %1\n" \
> + RISCV_ACQUIRE_BARRIER \
> + : "=r" (ret__), "+A" (*ptr__) \
> + : "r" (new__) \
> + : "memory" ); \
> + break; \
> + default: \
> + ASSERT_UNREACHABLE(); \
> + } \
> + ret__; \
> +})
If I'm not mistaken this differs from __xchg_relaxed() only in the use
of RISCV_ACQUIRE_BARRIER, and ...
> +#define xchg_acquire(ptr, x) \
> +({ \
> + __typeof__(*(ptr)) x_ = (x); \
> + (__typeof__(*(ptr))) __xchg_acquire((ptr), x_, sizeof(*(ptr))); \
> +})
> +
> +#define __xchg_release(ptr, new, size) \
> +({ \
> + __typeof__(ptr) ptr__ = (ptr); \
> + __typeof__(new) new__ = (new); \
> + __typeof__(*(ptr)) ret__; \
> + switch (size) \
> + { \
> + case 4: \
> + asm volatile ( \
> + RISCV_RELEASE_BARRIER \
> + " amoswap.w %0, %2, %1\n" \
> + : "=r" (ret__), "+A" (*ptr__) \
> + : "r" (new__) \
> + : "memory"); \
> + break; \
> + case 8: \
> + asm volatile ( \
> + RISCV_RELEASE_BARRIER \
> + " amoswap.d %0, %2, %1\n" \
> + : "=r" (ret__), "+A" (*ptr__) \
> + : "r" (new__) \
> + : "memory"); \
> + break; \
> + default: \
> + ASSERT_UNREACHABLE(); \
> + } \
> + ret__; \
> +})
this only in the use of RISCV_RELEASE_BARRIER. If so they likely want
folding, to limit redundancy and make eventual updating easier. (Same
for the cmpxchg helper further down, as it seems.)
> +#define xchg_release(ptr, x) \
> +({ \
> + __typeof__(*(ptr)) x_ = (x); \
> + (__typeof__(*(ptr))) __xchg_release((ptr), x_, sizeof(*(ptr))); \
> +})
> +
> +static always_inline uint32_t __xchg_case_4(volatile uint32_t *ptr,
> + uint32_t new)
> +{
> + __typeof__(*(ptr)) ret;
> +
> + asm volatile (
> + " amoswap.w.aqrl %0, %2, %1\n"
> + : "=r" (ret), "+A" (*ptr)
> + : "r" (new)
> + : "memory" );
> +
> + return ret;
> +}
> +
> +static always_inline uint64_t __xchg_case_8(volatile uint64_t *ptr,
> + uint64_t new)
> +{
> + __typeof__(*(ptr)) ret;
> +
> + asm volatile( \
> + " amoswap.d.aqrl %0, %2, %1\n" \
> + : "=r" (ret), "+A" (*ptr) \
> + : "r" (new) \
> + : "memory" ); \
> +
> + return ret;
> +}
> +
> +static always_inline unsigned short __cmpxchg_case_2(volatile uint32_t *ptr,
> + uint32_t old,
> + uint32_t new);
Don't you consistently mean uint16_t here (incl the return type) and ...
> +static always_inline unsigned short __cmpxchg_case_1(volatile uint32_t *ptr,
> + uint32_t old,
> + uint32_t new);
... uint8_t here?
> +static inline unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
> +{
> + switch (size) {
> + case 1:
> + return __cmpxchg_case_1(ptr, (uint32_t)-1, x);
> + case 2:
> + return __cmpxchg_case_2(ptr, (uint32_t)-1, x);
How are these going to work? You'll compare against ~0, and if the value
in memory isn't ~0, memory won't be updated; you will only (correctly)
return the value found in memory.
Or wait - looking at __cmpxchg_case_{1,2}() far further down, you ignore
"old" there. Which apparently means they'll work for the use here, but
not for the use in __cmpxchg().
> + case 4:
> + return __xchg_case_4(ptr, x);
> + case 8:
> + return __xchg_case_8(ptr, x);
> + default:
> + ASSERT_UNREACHABLE();
> + }
> +
> + return -1;
> +}
> +
> +#define xchg(ptr,x) \
> +({ \
> + __typeof__(*(ptr)) ret__; \
> + ret__ = (__typeof__(*(ptr))) \
> + __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \
> + ret__; \
> +})
> +
> +#define xchg32(ptr, x) \
> +({ \
> + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
> + xchg((ptr), (x)); \
> +})
> +
> +#define xchg64(ptr, x) \
> +({ \
> + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> + xchg((ptr), (x)); \
> +})
What are these two (and their cmpxchg counterparts) needed for?
> +/*
> + * Atomic compare and exchange. Compare OLD with MEM, if identical,
> + * store NEW in MEM. Return the initial value in MEM. Success is
> + * indicated by comparing RETURN with OLD.
> + */
> +#define __cmpxchg_relaxed(ptr, old, new, size) \
> +({ \
> + __typeof__(ptr) ptr__ = (ptr); \
> + __typeof__(*(ptr)) __old = (old); \
Leftover leading underscores?
> + __typeof__(*(ptr)) new__ = (new); \
Related to my earlier comment on types needing to be compatible - see
how here you're using "ptr" throughout.
> + __typeof__(*(ptr)) ret__; \
> + register unsigned int __rc; \
More leftover leading underscores?
> +static always_inline unsigned short __cmpxchg_case_2(volatile uint32_t *ptr,
> + uint32_t old,
> + uint32_t new)
> +{
> + (void) old;
> +
> + if (((unsigned long)ptr & 3) == 3)
> + {
> +#ifdef CONFIG_64BIT
> + return __emulate_cmpxchg_case1_2((uint64_t *)ptr, new,
> + readq, __cmpxchg_case_8, 0xffffU);
What if ((unsigned long)ptr & 7) == 7 (which is a sub-case of what the
if() above checks for? Isn't it more reasonable to require aligned
16-bit quantities here? Or if mis-aligned addresses are okay, you could
as well emulate using __cmpxchg_case_4().
Also you shouldn't be casting away volatile (here and below). Avoiding
the casts (by suitable using volatile void * parameter types) would
likely be best.
Jan
On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
> > +#define __xchg_acquire(ptr, new, size) \
> > +({ \
> > + __typeof__(ptr) ptr__ = (ptr); \
> > + __typeof__(new) new__ = (new); \
> > + __typeof__(*(ptr)) ret__; \
> > + switch (size) \
> > + { \
> > + case 4: \
> > + asm volatile( \
> > + " amoswap.w %0, %2, %1\n" \
> > + RISCV_ACQUIRE_BARRIER \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory" ); \
> > + break; \
> > + case 8: \
> > + asm volatile( \
> > + " amoswap.d %0, %2, %1\n" \
> > + RISCV_ACQUIRE_BARRIER \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory" ); \
> > + break; \
> > + default: \
> > + ASSERT_UNREACHABLE(); \
> > + } \
> > + ret__; \
> > +})
>
> If I'm not mistaken this differs from __xchg_relaxed() only in the
> use
> of RISCV_ACQUIRE_BARRIER, and ...
>
> > +#define xchg_acquire(ptr, x) \
> > +({ \
> > + __typeof__(*(ptr)) x_ = (x); \
> > + (__typeof__(*(ptr))) __xchg_acquire((ptr), x_,
> > sizeof(*(ptr))); \
> > +})
> > +
> > +#define __xchg_release(ptr, new, size) \
> > +({ \
> > + __typeof__(ptr) ptr__ = (ptr); \
> > + __typeof__(new) new__ = (new); \
> > + __typeof__(*(ptr)) ret__; \
> > + switch (size) \
> > + { \
> > + case 4: \
> > + asm volatile ( \
> > + RISCV_RELEASE_BARRIER \
> > + " amoswap.w %0, %2, %1\n" \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory"); \
> > + break; \
> > + case 8: \
> > + asm volatile ( \
> > + RISCV_RELEASE_BARRIER \
> > + " amoswap.d %0, %2, %1\n" \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory"); \
> > + break; \
> > + default: \
> > + ASSERT_UNREACHABLE(); \
> > + } \
> > + ret__; \
> > +})
>
> this only in the use of RISCV_RELEASE_BARRIER. If so they likely want
> folding, to limit redundancy and make eventual updating easier. (Same
> for the cmpxchg helper further down, as it seems.)
Also the difference is in where to place barrier before or after atomic
instruction. I am not sure that we can easily folded this macros.
~ Oleksii
On 30.01.2024 15:57, Oleksii wrote:
> On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
>>> +#define __xchg_acquire(ptr, new, size) \
>>> +({ \
>>> + __typeof__(ptr) ptr__ = (ptr); \
>>> + __typeof__(new) new__ = (new); \
>>> + __typeof__(*(ptr)) ret__; \
>>> + switch (size) \
>>> + { \
>>> + case 4: \
>>> + asm volatile( \
>>> + " amoswap.w %0, %2, %1\n" \
>>> + RISCV_ACQUIRE_BARRIER \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory" ); \
>>> + break; \
>>> + case 8: \
>>> + asm volatile( \
>>> + " amoswap.d %0, %2, %1\n" \
>>> + RISCV_ACQUIRE_BARRIER \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory" ); \
>>> + break; \
>>> + default: \
>>> + ASSERT_UNREACHABLE(); \
>>> + } \
>>> + ret__; \
>>> +})
>>
>> If I'm not mistaken this differs from __xchg_relaxed() only in the
>> use
>> of RISCV_ACQUIRE_BARRIER, and ...
>>
>>> +#define xchg_acquire(ptr, x) \
>>> +({ \
>>> + __typeof__(*(ptr)) x_ = (x); \
>>> + (__typeof__(*(ptr))) __xchg_acquire((ptr), x_,
>>> sizeof(*(ptr))); \
>>> +})
>>> +
>>> +#define __xchg_release(ptr, new, size) \
>>> +({ \
>>> + __typeof__(ptr) ptr__ = (ptr); \
>>> + __typeof__(new) new__ = (new); \
>>> + __typeof__(*(ptr)) ret__; \
>>> + switch (size) \
>>> + { \
>>> + case 4: \
>>> + asm volatile ( \
>>> + RISCV_RELEASE_BARRIER \
>>> + " amoswap.w %0, %2, %1\n" \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory"); \
>>> + break; \
>>> + case 8: \
>>> + asm volatile ( \
>>> + RISCV_RELEASE_BARRIER \
>>> + " amoswap.d %0, %2, %1\n" \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory"); \
>>> + break; \
>>> + default: \
>>> + ASSERT_UNREACHABLE(); \
>>> + } \
>>> + ret__; \
>>> +})
>>
>> this only in the use of RISCV_RELEASE_BARRIER. If so they likely want
>> folding, to limit redundancy and make eventual updating easier. (Same
>> for the cmpxchg helper further down, as it seems.)
> Also the difference is in where to place barrier before or after atomic
> instruction. I am not sure that we can easily folded this macros.
The folded macro would have two barrier parameters - on for acquire, one
for release.
Jan
On Tue, 2024-01-30 at 16:05 +0100, Jan Beulich wrote:
> On 30.01.2024 15:57, Oleksii wrote:
> > On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
> > > > +#define __xchg_acquire(ptr, new, size) \
> > > > +({ \
> > > > + __typeof__(ptr) ptr__ = (ptr); \
> > > > + __typeof__(new) new__ = (new); \
> > > > + __typeof__(*(ptr)) ret__; \
> > > > + switch (size) \
> > > > + { \
> > > > + case 4: \
> > > > + asm volatile( \
> > > > + " amoswap.w %0, %2, %1\n" \
> > > > + RISCV_ACQUIRE_BARRIER \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory" ); \
> > > > + break; \
> > > > + case 8: \
> > > > + asm volatile( \
> > > > + " amoswap.d %0, %2, %1\n" \
> > > > + RISCV_ACQUIRE_BARRIER \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory" ); \
> > > > + break; \
> > > > + default: \
> > > > + ASSERT_UNREACHABLE(); \
> > > > + } \
> > > > + ret__; \
> > > > +})
> > >
> > > If I'm not mistaken this differs from __xchg_relaxed() only in
> > > the
> > > use
> > > of RISCV_ACQUIRE_BARRIER, and ...
> > >
> > > > +#define xchg_acquire(ptr, x) \
> > > > +({ \
> > > > + __typeof__(*(ptr)) x_ = (x); \
> > > > + (__typeof__(*(ptr))) __xchg_acquire((ptr), x_,
> > > > sizeof(*(ptr))); \
> > > > +})
> > > > +
> > > > +#define __xchg_release(ptr, new, size) \
> > > > +({ \
> > > > + __typeof__(ptr) ptr__ = (ptr); \
> > > > + __typeof__(new) new__ = (new); \
> > > > + __typeof__(*(ptr)) ret__; \
> > > > + switch (size) \
> > > > + { \
> > > > + case 4: \
> > > > + asm volatile ( \
> > > > + RISCV_RELEASE_BARRIER \
> > > > + " amoswap.w %0, %2, %1\n" \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory"); \
> > > > + break; \
> > > > + case 8: \
> > > > + asm volatile ( \
> > > > + RISCV_RELEASE_BARRIER \
> > > > + " amoswap.d %0, %2, %1\n" \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory"); \
> > > > + break; \
> > > > + default: \
> > > > + ASSERT_UNREACHABLE(); \
> > > > + } \
> > > > + ret__; \
> > > > +})
> > >
> > > this only in the use of RISCV_RELEASE_BARRIER. If so they likely
> > > want
> > > folding, to limit redundancy and make eventual updating easier.
> > > (Same
> > > for the cmpxchg helper further down, as it seems.)
> > Also the difference is in where to place barrier before or after
> > atomic
> > instruction. I am not sure that we can easily folded this macros.
>
> The folded macro would have two barrier parameters - on for acquire,
> one
> for release.
Yes, in such case it will work.
Thanks.
~ Oleksii
On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
> On 22.12.2023 16:12, Oleksii Kurochko wrote:
> > --- /dev/null
> > +++ b/xen/arch/riscv/include/asm/cmpxchg.h
> > @@ -0,0 +1,496 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/* Copyright (C) 2014 Regents of the University of California */
> > +
> > +#ifndef _ASM_RISCV_CMPXCHG_H
> > +#define _ASM_RISCV_CMPXCHG_H
> > +
> > +#include <xen/compiler.h>
> > +#include <xen/lib.h>
> > +
> > +#include <asm/fence.h>
> > +#include <asm/io.h>
> > +#include <asm/system.h>
> > +
> > +#define __xchg_relaxed(ptr, new, size) \
> > +({ \
> > + __typeof__(ptr) ptr__ = (ptr); \
> > + __typeof__(new) new__ = (new); \
> > + __typeof__(*(ptr)) ret__; \
>
> I expect the types of new and *ptr want to actually match. Which
> you then want to enforce, so that issues at use sites would either
> be reported by the compiler, or be permitted by a type conversion
> of new.
I am OK to make the same type for new and ret__, but it looks like
__xchg_relaxed() is used only inside xchg_relaxed() where 'new' is
declared with the same type as *ptr.
>
> > + switch (size) \
> > + { \
>
> Nit: Hard tab left here. (Also again you want to either stick to
> Linux style or fully switch to Xen style.)
Thanks. I'll update that.
>
> > + case 4: \
> > + asm volatile( \
> > + " amoswap.w %0, %2, %1\n" \
>
> I don't think a leading tab (or leading whitespace in general) is
> needed in single-line-output asm()s. The trailing \n also isn't
> needed if I'm not mistaken.
I just wanted to align it with "=r", but for sure a leading tab or
whitespace can be dropped.
>
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory" ); \
> > + break; \
> > + case 8: \
> > + asm volatile( \
> > + " amoswap.d %0, %2, %1\n" \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory" ); \
> > + break; \
> > + default: \
> > + ASSERT_UNREACHABLE(); \
>
> If at all possible this wants to trigger a build failure, not a
> runtime
> one.
I'll update that with BUILD_BUG_ON(size > 8);
>
> > + } \
> > + ret__; \
> > +})
> > +
> > +#define xchg_relaxed(ptr, x) \
> > +({ \
> > + __typeof__(*(ptr)) x_ = (x); \
> > + (__typeof__(*(ptr))) __xchg_relaxed((ptr), x_,
> > sizeof(*(ptr))); \
>
> Nit: Stray blank after cast. For readability I'd also suggest to
> drop parentheses in cases like the first argument passed to
> __xchg_relaxed() here.
Thanks. I'll take that into account.
>
> > +})
>
> For both: What does "relaxed" describe? I'm asking because it's not
> really clear whether the memory clobbers are actually needed.
>
> > +#define __xchg_acquire(ptr, new, size) \
> > +({ \
> > + __typeof__(ptr) ptr__ = (ptr); \
> > + __typeof__(new) new__ = (new); \
> > + __typeof__(*(ptr)) ret__; \
> > + switch (size) \
> > + { \
> > + case 4: \
> > + asm volatile( \
> > + " amoswap.w %0, %2, %1\n" \
> > + RISCV_ACQUIRE_BARRIER \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory" ); \
> > + break; \
> > + case 8: \
> > + asm volatile( \
> > + " amoswap.d %0, %2, %1\n" \
> > + RISCV_ACQUIRE_BARRIER \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory" ); \
> > + break; \
> > + default: \
> > + ASSERT_UNREACHABLE(); \
> > + } \
> > + ret__; \
> > +})
>
> If I'm not mistaken this differs from __xchg_relaxed() only in the
> use
> of RISCV_ACQUIRE_BARRIER, and ...
>
> > +#define xchg_acquire(ptr, x) \
> > +({ \
> > + __typeof__(*(ptr)) x_ = (x); \
> > + (__typeof__(*(ptr))) __xchg_acquire((ptr), x_,
> > sizeof(*(ptr))); \
> > +})
> > +
> > +#define __xchg_release(ptr, new, size) \
> > +({ \
> > + __typeof__(ptr) ptr__ = (ptr); \
> > + __typeof__(new) new__ = (new); \
> > + __typeof__(*(ptr)) ret__; \
> > + switch (size) \
> > + { \
> > + case 4: \
> > + asm volatile ( \
> > + RISCV_RELEASE_BARRIER \
> > + " amoswap.w %0, %2, %1\n" \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory"); \
> > + break; \
> > + case 8: \
> > + asm volatile ( \
> > + RISCV_RELEASE_BARRIER \
> > + " amoswap.d %0, %2, %1\n" \
> > + : "=r" (ret__), "+A" (*ptr__) \
> > + : "r" (new__) \
> > + : "memory"); \
> > + break; \
> > + default: \
> > + ASSERT_UNREACHABLE(); \
> > + } \
> > + ret__; \
> > +})
>
> this only in the use of RISCV_RELEASE_BARRIER. If so they likely want
> folding, to limit redundancy and make eventual updating easier. (Same
> for the cmpxchg helper further down, as it seems.)
Yes, you are right. The difference is only in RISCV_RELEASE_BARRIER and
it is an absence of RISCV_RELEASE_BARRIER is a reason why we have
"relaxed" versions.
I am not sure that I understand what you mean by folding here. Do you
mean that there is no any sense to have to separate macros and it is
needed only one with RISCV_RELEASE_BARRIER?
>
> > +#define xchg_release(ptr, x) \
> > +({ \
> > + __typeof__(*(ptr)) x_ = (x); \
> > + (__typeof__(*(ptr))) __xchg_release((ptr), x_,
> > sizeof(*(ptr))); \
> > +})
> > +
> > +static always_inline uint32_t __xchg_case_4(volatile uint32_t
> > *ptr,
> > + uint32_t new)
> > +{
> > + __typeof__(*(ptr)) ret;
> > +
> > + asm volatile (
> > + " amoswap.w.aqrl %0, %2, %1\n"
> > + : "=r" (ret), "+A" (*ptr)
> > + : "r" (new)
> > + : "memory" );
> > +
> > + return ret;
> > +}
> > +
> > +static always_inline uint64_t __xchg_case_8(volatile uint64_t
> > *ptr,
> > + uint64_t new)
> > +{
> > + __typeof__(*(ptr)) ret;
> > +
> > + asm volatile( \
> > + " amoswap.d.aqrl %0, %2, %1\n" \
> > + : "=r" (ret), "+A" (*ptr) \
> > + : "r" (new) \
> > + : "memory" ); \
> > +
> > + return ret;
> > +}
> > +
> > +static always_inline unsigned short __cmpxchg_case_2(volatile
> > uint32_t *ptr,
> > + uint32_t old,
> > + uint32_t
> > new);
>
> Don't you consistently mean uint16_t here (incl the return type) and
> ...
>
> > +static always_inline unsigned short __cmpxchg_case_1(volatile
> > uint32_t *ptr,
> > + uint32_t old,
> > + uint32_t
> > new);
>
> ... uint8_t here?
The idea was that we emulate __cmpxchg_case_1 and __cmpxchg_case_2
using 4 bytes cmpxchg and __cmpxchg_case_4 expects uint32_t.
>
> > +static inline unsigned long __xchg(volatile void *ptr, unsigned
> > long x, int size)
> > +{
> > + switch (size) {
> > + case 1:
> > + return __cmpxchg_case_1(ptr, (uint32_t)-1, x);
> > + case 2:
> > + return __cmpxchg_case_2(ptr, (uint32_t)-1, x);
>
> How are these going to work? You'll compare against ~0, and if the
> value
> in memory isn't ~0, memory won't be updated; you will only
> (correctly)
> return the value found in memory.
>
> Or wait - looking at __cmpxchg_case_{1,2}() far further down, you
> ignore
> "old" there. Which apparently means they'll work for the use here,
> but
> not for the use in __cmpxchg().
Yes, the trick is that old is ignored and is read in
__emulate_cmpxchg_case1_2() before __cmpxchg_case_4 is called:
do {
read_val = read_func(aligned_ptr);
swapped_new = read_val & ~mask;
swapped_new |= masked_new;
ret = cmpxchg_func(aligned_ptr, read_val, swapped_new);
} while ( ret != read_val );
read_val it is 'old'.
But now I am not 100% sure that it is correct for __cmpxchg...
>
> > + case 4:
> > + return __xchg_case_4(ptr, x);
> > + case 8:
> > + return __xchg_case_8(ptr, x);
> > + default:
> > + ASSERT_UNREACHABLE();
> > + }
> > +
> > + return -1;
> > +}
> > +
> > +#define xchg(ptr,x) \
> > +({ \
> > + __typeof__(*(ptr)) ret__; \
> > + ret__ = (__typeof__(*(ptr))) \
> > + __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \
> > + ret__; \
> > +})
> > +
> > +#define xchg32(ptr, x) \
> > +({ \
> > + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
> > + xchg((ptr), (x)); \
> > +})
> > +
> > +#define xchg64(ptr, x) \
> > +({ \
> > + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
> > + xchg((ptr), (x)); \
> > +})
>
> What are these two (and their cmpxchg counterparts) needed for?
It was copied from Linux, but it looks like they can be really dropped,
I can't find where it is used, so I'll drop them.
>
> > +/*
> > + * Atomic compare and exchange. Compare OLD with MEM, if
> > identical,
> > + * store NEW in MEM. Return the initial value in MEM. Success is
> > + * indicated by comparing RETURN with OLD.
> > + */
> > +#define __cmpxchg_relaxed(ptr, old, new, size) \
> > +({ \
> > + __typeof__(ptr) ptr__ = (ptr); \
> > + __typeof__(*(ptr)) __old = (old); \
>
> Leftover leading underscores?
>
> > + __typeof__(*(ptr)) new__ = (new); \
>
> Related to my earlier comment on types needing to be compatible - see
> how here you're using "ptr" throughout.
>
> > + __typeof__(*(ptr)) ret__; \
> > + register unsigned int __rc; \
>
> More leftover leading underscores?
Missed to drop underscores. Thanks. I'll drop them in next version.
>
> > +static always_inline unsigned short __cmpxchg_case_2(volatile
> > uint32_t *ptr,
> > + uint32_t old,
> > + uint32_t new)
> > +{
> > + (void) old;
> > +
> > + if (((unsigned long)ptr & 3) == 3)
> > + {
> > +#ifdef CONFIG_64BIT
> > + return __emulate_cmpxchg_case1_2((uint64_t *)ptr, new,
> > + readq, __cmpxchg_case_8,
> > 0xffffU);
>
> What if ((unsigned long)ptr & 7) == 7 (which is a sub-case of what
> the
> if() above checks for? Isn't it more reasonable to require aligned
> 16-bit quantities here? Or if mis-aligned addresses are okay, you
> could
> as well emulate using __cmpxchg_case_4().
Yes, it will be more reasonable. I'll use IS_ALIGNED instead.
>
> Also you shouldn't be casting away volatile (here and below).
> Avoiding
> the casts (by suitable using volatile void * parameter types) would
> likely be best.
Thanks. I'll update the function prototype.
~ Oleksii
On 23.01.2024 11:15, Oleksii wrote:
> On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
>> On 22.12.2023 16:12, Oleksii Kurochko wrote:
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory" ); \
>>> + break; \
>>> + case 8: \
>>> + asm volatile( \
>>> + " amoswap.d %0, %2, %1\n" \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory" ); \
>>> + break; \
>>> + default: \
>>> + ASSERT_UNREACHABLE(); \
>>
>> If at all possible this wants to trigger a build failure, not a
>> runtime
>> one.
> I'll update that with BUILD_BUG_ON(size > 8);
What about size accidentally being e.g. 5? I'm also not sure you'll be
able to use BUILD_BUG_ON() here: That'll depend on what use sites there
are. And if all present ones are okay in this regard, you'd still set
out a trap for someone else to fall into later. We have a common
approach for this, which currently is under re-work. See
https://lists.xen.org/archives/html/xen-devel/2024-01/msg01115.html.
>>> + } \
>>> + ret__; \
>>> +})
>>> +
>>> +#define xchg_relaxed(ptr, x) \
>>> +({ \
>>> + __typeof__(*(ptr)) x_ = (x); \
>>> + (__typeof__(*(ptr))) __xchg_relaxed((ptr), x_,
>>> sizeof(*(ptr))); \
>>
>> Nit: Stray blank after cast. For readability I'd also suggest to
>> drop parentheses in cases like the first argument passed to
>> __xchg_relaxed() here.
> Thanks. I'll take that into account.
>
>>
>>> +})
>>
>> For both: What does "relaxed" describe? I'm asking because it's not
>> really clear whether the memory clobbers are actually needed.
>>
>>> +#define __xchg_acquire(ptr, new, size) \
>>> +({ \
>>> + __typeof__(ptr) ptr__ = (ptr); \
>>> + __typeof__(new) new__ = (new); \
>>> + __typeof__(*(ptr)) ret__; \
>>> + switch (size) \
>>> + { \
>>> + case 4: \
>>> + asm volatile( \
>>> + " amoswap.w %0, %2, %1\n" \
>>> + RISCV_ACQUIRE_BARRIER \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory" ); \
>>> + break; \
>>> + case 8: \
>>> + asm volatile( \
>>> + " amoswap.d %0, %2, %1\n" \
>>> + RISCV_ACQUIRE_BARRIER \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory" ); \
>>> + break; \
>>> + default: \
>>> + ASSERT_UNREACHABLE(); \
>>> + } \
>>> + ret__; \
>>> +})
>>
>> If I'm not mistaken this differs from __xchg_relaxed() only in the
>> use
>> of RISCV_ACQUIRE_BARRIER, and ...
>>
>>> +#define xchg_acquire(ptr, x) \
>>> +({ \
>>> + __typeof__(*(ptr)) x_ = (x); \
>>> + (__typeof__(*(ptr))) __xchg_acquire((ptr), x_,
>>> sizeof(*(ptr))); \
>>> +})
>>> +
>>> +#define __xchg_release(ptr, new, size) \
>>> +({ \
>>> + __typeof__(ptr) ptr__ = (ptr); \
>>> + __typeof__(new) new__ = (new); \
>>> + __typeof__(*(ptr)) ret__; \
>>> + switch (size) \
>>> + { \
>>> + case 4: \
>>> + asm volatile ( \
>>> + RISCV_RELEASE_BARRIER \
>>> + " amoswap.w %0, %2, %1\n" \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory"); \
>>> + break; \
>>> + case 8: \
>>> + asm volatile ( \
>>> + RISCV_RELEASE_BARRIER \
>>> + " amoswap.d %0, %2, %1\n" \
>>> + : "=r" (ret__), "+A" (*ptr__) \
>>> + : "r" (new__) \
>>> + : "memory"); \
>>> + break; \
>>> + default: \
>>> + ASSERT_UNREACHABLE(); \
>>> + } \
>>> + ret__; \
>>> +})
>>
>> this only in the use of RISCV_RELEASE_BARRIER. If so they likely want
>> folding, to limit redundancy and make eventual updating easier. (Same
>> for the cmpxchg helper further down, as it seems.)
> Yes, you are right. The difference is only in RISCV_RELEASE_BARRIER and
> it is an absence of RISCV_RELEASE_BARRIER is a reason why we have
> "relaxed" versions.
>
> I am not sure that I understand what you mean by folding here. Do you
> mean that there is no any sense to have to separate macros and it is
> needed only one with RISCV_RELEASE_BARRIER?
No. You should parameterize the folded common macro for the derived
macros to simply pass in the barriers needed, with empty macro
arguments indicating "this barrier not needed".
>>> +#define xchg_release(ptr, x) \
>>> +({ \
>>> + __typeof__(*(ptr)) x_ = (x); \
>>> + (__typeof__(*(ptr))) __xchg_release((ptr), x_,
>>> sizeof(*(ptr))); \
>>> +})
>>> +
>>> +static always_inline uint32_t __xchg_case_4(volatile uint32_t
>>> *ptr,
>>> + uint32_t new)
>>> +{
>>> + __typeof__(*(ptr)) ret;
>>> +
>>> + asm volatile (
>>> + " amoswap.w.aqrl %0, %2, %1\n"
>>> + : "=r" (ret), "+A" (*ptr)
>>> + : "r" (new)
>>> + : "memory" );
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +static always_inline uint64_t __xchg_case_8(volatile uint64_t
>>> *ptr,
>>> + uint64_t new)
>>> +{
>>> + __typeof__(*(ptr)) ret;
>>> +
>>> + asm volatile( \
>>> + " amoswap.d.aqrl %0, %2, %1\n" \
>>> + : "=r" (ret), "+A" (*ptr) \
>>> + : "r" (new) \
>>> + : "memory" ); \
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +static always_inline unsigned short __cmpxchg_case_2(volatile
>>> uint32_t *ptr,
>>> + uint32_t old,
>>> + uint32_t
>>> new);
>>
>> Don't you consistently mean uint16_t here (incl the return type) and
>> ...
>>
>>> +static always_inline unsigned short __cmpxchg_case_1(volatile
>>> uint32_t *ptr,
>>> + uint32_t old,
>>> + uint32_t
>>> new);
>>
>> ... uint8_t here?
> The idea was that we emulate __cmpxchg_case_1 and __cmpxchg_case_2
> using 4 bytes cmpxchg and __cmpxchg_case_4 expects uint32_t.
I consider this wrong. The functions would better be type-correct.
>>> +static inline unsigned long __xchg(volatile void *ptr, unsigned
>>> long x, int size)
>>> +{
>>> + switch (size) {
>>> + case 1:
>>> + return __cmpxchg_case_1(ptr, (uint32_t)-1, x);
>>> + case 2:
>>> + return __cmpxchg_case_2(ptr, (uint32_t)-1, x);
>>
>> How are these going to work? You'll compare against ~0, and if the
>> value
>> in memory isn't ~0, memory won't be updated; you will only
>> (correctly)
>> return the value found in memory.
>>
>> Or wait - looking at __cmpxchg_case_{1,2}() far further down, you
>> ignore
>> "old" there. Which apparently means they'll work for the use here,
>> but
>> not for the use in __cmpxchg().
> Yes, the trick is that old is ignored and is read in
> __emulate_cmpxchg_case1_2() before __cmpxchg_case_4 is called:
> do {
> read_val = read_func(aligned_ptr);
> swapped_new = read_val & ~mask;
> swapped_new |= masked_new;
> ret = cmpxchg_func(aligned_ptr, read_val, swapped_new);
> } while ( ret != read_val );
> read_val it is 'old'.
>
> But now I am not 100% sure that it is correct for __cmpxchg...
It just can't be correct - you can't ignore "old" there. I think you
want simple cmpxchg primitives, which xchg then uses in a loop (while
cmpxchg uses them plainly).
>>> +static always_inline unsigned short __cmpxchg_case_2(volatile
>>> uint32_t *ptr,
>>> + uint32_t old,
>>> + uint32_t new)
>>> +{
>>> + (void) old;
>>> +
>>> + if (((unsigned long)ptr & 3) == 3)
>>> + {
>>> +#ifdef CONFIG_64BIT
>>> + return __emulate_cmpxchg_case1_2((uint64_t *)ptr, new,
>>> + readq, __cmpxchg_case_8,
>>> 0xffffU);
>>
>> What if ((unsigned long)ptr & 7) == 7 (which is a sub-case of what
>> the
>> if() above checks for? Isn't it more reasonable to require aligned
>> 16-bit quantities here? Or if mis-aligned addresses are okay, you
>> could
>> as well emulate using __cmpxchg_case_4().
> Yes, it will be more reasonable. I'll use IS_ALIGNED instead.
Not sure I get your use of "instead" here correctly. There's more
to change here than just the if() condition.
Jan
On Tue, 2024-01-23 at 11:28 +0100, Jan Beulich wrote:
> On 23.01.2024 11:15, Oleksii wrote:
> > On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
> > > On 22.12.2023 16:12, Oleksii Kurochko wrote:
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory" ); \
> > > > + break; \
> > > > + case 8: \
> > > > + asm volatile( \
> > > > + " amoswap.d %0, %2, %1\n" \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory" ); \
> > > > + break; \
> > > > + default: \
> > > > + ASSERT_UNREACHABLE(); \
> > >
> > > If at all possible this wants to trigger a build failure, not a
> > > runtime
> > > one.
> > I'll update that with BUILD_BUG_ON(size > 8);
>
> What about size accidentally being e.g. 5? I'm also not sure you'll
> be
> able to use BUILD_BUG_ON() here: That'll depend on what use sites
> there
> are. And if all present ones are okay in this regard, you'd still set
> out a trap for someone else to fall into later. We have a common
> approach for this, which currently is under re-work. See
> https://lists.xen.org/archives/html/xen-devel/2024-01/msg01115.html.
Thanks. I'll use that.
>
> > > > + } \
> > > > + ret__; \
> > > > +})
> > > > +
> > > > +#define xchg_relaxed(ptr, x) \
> > > > +({ \
> > > > + __typeof__(*(ptr)) x_ = (x); \
> > > > + (__typeof__(*(ptr))) __xchg_relaxed((ptr), x_,
> > > > sizeof(*(ptr))); \
> > >
> > > Nit: Stray blank after cast. For readability I'd also suggest to
> > > drop parentheses in cases like the first argument passed to
> > > __xchg_relaxed() here.
> > Thanks. I'll take that into account.
> >
> > >
> > > > +})
> > >
> > > For both: What does "relaxed" describe? I'm asking because it's
> > > not
> > > really clear whether the memory clobbers are actually needed.
> > >
> > > > +#define __xchg_acquire(ptr, new, size) \
> > > > +({ \
> > > > + __typeof__(ptr) ptr__ = (ptr); \
> > > > + __typeof__(new) new__ = (new); \
> > > > + __typeof__(*(ptr)) ret__; \
> > > > + switch (size) \
> > > > + { \
> > > > + case 4: \
> > > > + asm volatile( \
> > > > + " amoswap.w %0, %2, %1\n" \
> > > > + RISCV_ACQUIRE_BARRIER \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory" ); \
> > > > + break; \
> > > > + case 8: \
> > > > + asm volatile( \
> > > > + " amoswap.d %0, %2, %1\n" \
> > > > + RISCV_ACQUIRE_BARRIER \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory" ); \
> > > > + break; \
> > > > + default: \
> > > > + ASSERT_UNREACHABLE(); \
> > > > + } \
> > > > + ret__; \
> > > > +})
> > >
> > > If I'm not mistaken this differs from __xchg_relaxed() only in
> > > the
> > > use
> > > of RISCV_ACQUIRE_BARRIER, and ...
> > >
> > > > +#define xchg_acquire(ptr, x) \
> > > > +({ \
> > > > + __typeof__(*(ptr)) x_ = (x); \
> > > > + (__typeof__(*(ptr))) __xchg_acquire((ptr), x_,
> > > > sizeof(*(ptr))); \
> > > > +})
> > > > +
> > > > +#define __xchg_release(ptr, new, size) \
> > > > +({ \
> > > > + __typeof__(ptr) ptr__ = (ptr); \
> > > > + __typeof__(new) new__ = (new); \
> > > > + __typeof__(*(ptr)) ret__; \
> > > > + switch (size) \
> > > > + { \
> > > > + case 4: \
> > > > + asm volatile ( \
> > > > + RISCV_RELEASE_BARRIER \
> > > > + " amoswap.w %0, %2, %1\n" \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory"); \
> > > > + break; \
> > > > + case 8: \
> > > > + asm volatile ( \
> > > > + RISCV_RELEASE_BARRIER \
> > > > + " amoswap.d %0, %2, %1\n" \
> > > > + : "=r" (ret__), "+A" (*ptr__) \
> > > > + : "r" (new__) \
> > > > + : "memory"); \
> > > > + break; \
> > > > + default: \
> > > > + ASSERT_UNREACHABLE(); \
> > > > + } \
> > > > + ret__; \
> > > > +})
> > >
> > > this only in the use of RISCV_RELEASE_BARRIER. If so they likely
> > > want
> > > folding, to limit redundancy and make eventual updating easier.
> > > (Same
> > > for the cmpxchg helper further down, as it seems.)
> > Yes, you are right. The difference is only in RISCV_RELEASE_BARRIER
> > and
> > it is an absence of RISCV_RELEASE_BARRIER is a reason why we have
> > "relaxed" versions.
> >
> > I am not sure that I understand what you mean by folding here. Do
> > you
> > mean that there is no any sense to have to separate macros and it
> > is
> > needed only one with RISCV_RELEASE_BARRIER?
>
> No. You should parameterize the folded common macro for the derived
> macros to simply pass in the barriers needed, with empty macro
> arguments indicating "this barrier not needed".
Now it makes sense to me. Thank you for explanation.
>
> > > > +#define xchg_release(ptr, x) \
> > > > +({ \
> > > > + __typeof__(*(ptr)) x_ = (x); \
> > > > + (__typeof__(*(ptr))) __xchg_release((ptr), x_,
> > > > sizeof(*(ptr))); \
> > > > +})
> > > > +
> > > > +static always_inline uint32_t __xchg_case_4(volatile uint32_t
> > > > *ptr,
> > > > + uint32_t new)
> > > > +{
> > > > + __typeof__(*(ptr)) ret;
> > > > +
> > > > + asm volatile (
> > > > + " amoswap.w.aqrl %0, %2, %1\n"
> > > > + : "=r" (ret), "+A" (*ptr)
> > > > + : "r" (new)
> > > > + : "memory" );
> > > > +
> > > > + return ret;
> > > > +}
> > > > +
> > > > +static always_inline uint64_t __xchg_case_8(volatile uint64_t
> > > > *ptr,
> > > > + uint64_t new)
> > > > +{
> > > > + __typeof__(*(ptr)) ret;
> > > > +
> > > > + asm volatile( \
> > > > + " amoswap.d.aqrl %0, %2, %1\n" \
> > > > + : "=r" (ret), "+A" (*ptr) \
> > > > + : "r" (new) \
> > > > + : "memory" ); \
> > > > +
> > > > + return ret;
> > > > +}
> > > > +
> > > > +static always_inline unsigned short __cmpxchg_case_2(volatile
> > > > uint32_t *ptr,
> > > > + uint32_t
> > > > old,
> > > > + uint32_t
> > > > new);
> > >
> > > Don't you consistently mean uint16_t here (incl the return type)
> > > and
> > > ...
> > >
> > > > +static always_inline unsigned short __cmpxchg_case_1(volatile
> > > > uint32_t *ptr,
> > > > + uint32_t
> > > > old,
> > > > + uint32_t
> > > > new);
> > >
> > > ... uint8_t here?
> > The idea was that we emulate __cmpxchg_case_1 and __cmpxchg_case_2
> > using 4 bytes cmpxchg and __cmpxchg_case_4 expects uint32_t.
>
> I consider this wrong. The functions would better be type-correct.
>
> > > > +static inline unsigned long __xchg(volatile void *ptr,
> > > > unsigned
> > > > long x, int size)
> > > > +{
> > > > + switch (size) {
> > > > + case 1:
> > > > + return __cmpxchg_case_1(ptr, (uint32_t)-1, x);
> > > > + case 2:
> > > > + return __cmpxchg_case_2(ptr, (uint32_t)-1, x);
> > >
> > > How are these going to work? You'll compare against ~0, and if
> > > the
> > > value
> > > in memory isn't ~0, memory won't be updated; you will only
> > > (correctly)
> > > return the value found in memory.
> > >
> > > Or wait - looking at __cmpxchg_case_{1,2}() far further down, you
> > > ignore
> > > "old" there. Which apparently means they'll work for the use
> > > here,
> > > but
> > > not for the use in __cmpxchg().
> > Yes, the trick is that old is ignored and is read in
> > __emulate_cmpxchg_case1_2() before __cmpxchg_case_4 is called:
> > do
> > {
> > read_val =
> > read_func(aligned_ptr);
> > swapped_new = read_val &
> > ~mask;
> > swapped_new |=
> > masked_new;
> > ret = cmpxchg_func(aligned_ptr, read_val,
> > swapped_new);
> > } while ( ret != read_val
> > );
> > read_val it is 'old'.
> >
> > But now I am not 100% sure that it is correct for __cmpxchg...
>
> It just can't be correct - you can't ignore "old" there. I think you
> want simple cmpxchg primitives, which xchg then uses in a loop (while
> cmpxchg uses them plainly).
But xchg doesn't require 'old' value, so it should be ignored in some
way by cmpxchg.
>
> > > > +static always_inline unsigned short __cmpxchg_case_2(volatile
> > > > uint32_t *ptr,
> > > > + uint32_t
> > > > old,
> > > > + uint32_t
> > > > new)
> > > > +{
> > > > + (void) old;
> > > > +
> > > > + if (((unsigned long)ptr & 3) == 3)
> > > > + {
> > > > +#ifdef CONFIG_64BIT
> > > > + return __emulate_cmpxchg_case1_2((uint64_t *)ptr, new,
> > > > + readq,
> > > > __cmpxchg_case_8,
> > > > 0xffffU);
> > >
> > > What if ((unsigned long)ptr & 7) == 7 (which is a sub-case of
> > > what
> > > the
> > > if() above checks for? Isn't it more reasonable to require
> > > aligned
> > > 16-bit quantities here? Or if mis-aligned addresses are okay, you
> > > could
> > > as well emulate using __cmpxchg_case_4().
> > Yes, it will be more reasonable. I'll use IS_ALIGNED instead.
>
> Not sure I get your use of "instead" here correctly. There's more
> to change here than just the if() condition.
I meant something like:
if ( IS_ALIGNED(ptr, 16) )
__emulate_cmpxchg_case1_2(...);
else
assert_failed("ptr isn't aligned\n");
I have to check if it is necessary to have an mis-aligned address for
CAS intstuctions.
If mis-aligned address is okay then it looks like we can use always
__cmpxchng_case_4 without checking if a ptr is ALIGNED or not. Or I
started to loose something...
~ Oleksii
On 23.01.2024 13:18, Oleksii wrote:
> On Tue, 2024-01-23 at 11:28 +0100, Jan Beulich wrote:
>> On 23.01.2024 11:15, Oleksii wrote:
>>> On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
>>>> On 22.12.2023 16:12, Oleksii Kurochko wrote:
>>>>> +static inline unsigned long __xchg(volatile void *ptr,
>>>>> unsigned
>>>>> long x, int size)
>>>>> +{
>>>>> + switch (size) {
>>>>> + case 1:
>>>>> + return __cmpxchg_case_1(ptr, (uint32_t)-1, x);
>>>>> + case 2:
>>>>> + return __cmpxchg_case_2(ptr, (uint32_t)-1, x);
>>>>
>>>> How are these going to work? You'll compare against ~0, and if
>>>> the
>>>> value
>>>> in memory isn't ~0, memory won't be updated; you will only
>>>> (correctly)
>>>> return the value found in memory.
>>>>
>>>> Or wait - looking at __cmpxchg_case_{1,2}() far further down, you
>>>> ignore
>>>> "old" there. Which apparently means they'll work for the use
>>>> here,
>>>> but
>>>> not for the use in __cmpxchg().
>>> Yes, the trick is that old is ignored and is read in
>>> __emulate_cmpxchg_case1_2() before __cmpxchg_case_4 is called:
>>> do
>>> {
>>> read_val =
>>> read_func(aligned_ptr);
>>> swapped_new = read_val &
>>> ~mask;
>>> swapped_new |=
>>> masked_new;
>>> ret = cmpxchg_func(aligned_ptr, read_val,
>>> swapped_new);
>>> } while ( ret != read_val
>>> );
>>> read_val it is 'old'.
>>>
>>> But now I am not 100% sure that it is correct for __cmpxchg...
>>
>> It just can't be correct - you can't ignore "old" there. I think you
>> want simple cmpxchg primitives, which xchg then uses in a loop (while
>> cmpxchg uses them plainly).
> But xchg doesn't require 'old' value, so it should be ignored in some
> way by cmpxchg.
Well, no. If you have only cmpxchg, I think your only choice is - as
said - to read the old value and then loop over cmpxchg until that
succeeds. Not really different from other operations which need
emulating using cmpxchg.
>>>>> +static always_inline unsigned short __cmpxchg_case_2(volatile
>>>>> uint32_t *ptr,
>>>>> + uint32_t
>>>>> old,
>>>>> + uint32_t
>>>>> new)
>>>>> +{
>>>>> + (void) old;
>>>>> +
>>>>> + if (((unsigned long)ptr & 3) == 3)
>>>>> + {
>>>>> +#ifdef CONFIG_64BIT
>>>>> + return __emulate_cmpxchg_case1_2((uint64_t *)ptr, new,
>>>>> + readq,
>>>>> __cmpxchg_case_8,
>>>>> 0xffffU);
>>>>
>>>> What if ((unsigned long)ptr & 7) == 7 (which is a sub-case of
>>>> what
>>>> the
>>>> if() above checks for? Isn't it more reasonable to require
>>>> aligned
>>>> 16-bit quantities here? Or if mis-aligned addresses are okay, you
>>>> could
>>>> as well emulate using __cmpxchg_case_4().
>>> Yes, it will be more reasonable. I'll use IS_ALIGNED instead.
>>
>> Not sure I get your use of "instead" here correctly. There's more
>> to change here than just the if() condition.
> I meant something like:
>
> if ( IS_ALIGNED(ptr, 16) )
> __emulate_cmpxchg_case1_2(...);
> else
> assert_failed("ptr isn't aligned\n");
Except that you'd better not use assert_failed() directly anywhere,
and the above is easier as
ASSERT(IS_ALIGNED(ptr, 16));
__emulate_cmpxchg_case1_2(...);
anyway (leaving aside that I guess you mean 2, not 16).
Jan
On Tue, 2024-01-23 at 14:27 +0100, Jan Beulich wrote:
> On 23.01.2024 13:18, Oleksii wrote:
> > On Tue, 2024-01-23 at 11:28 +0100, Jan Beulich wrote:
> > > On 23.01.2024 11:15, Oleksii wrote:
> > > > On Mon, 2024-01-22 at 17:27 +0100, Jan Beulich wrote:
> > > > > On 22.12.2023 16:12, Oleksii Kurochko wrote:
> > > > > > +static inline unsigned long __xchg(volatile void *ptr,
> > > > > > unsigned
> > > > > > long x, int size)
> > > > > > +{
> > > > > > + switch (size) {
> > > > > > + case 1:
> > > > > > + return __cmpxchg_case_1(ptr, (uint32_t)-1, x);
> > > > > > + case 2:
> > > > > > + return __cmpxchg_case_2(ptr, (uint32_t)-1, x);
> > > > >
> > > > > How are these going to work? You'll compare against ~0, and
> > > > > if
> > > > > the
> > > > > value
> > > > > in memory isn't ~0, memory won't be updated; you will only
> > > > > (correctly)
> > > > > return the value found in memory.
> > > > >
> > > > > Or wait - looking at __cmpxchg_case_{1,2}() far further down,
> > > > > you
> > > > > ignore
> > > > > "old" there. Which apparently means they'll work for the use
> > > > > here,
> > > > > but
> > > > > not for the use in __cmpxchg().
> > > > Yes, the trick is that old is ignored and is read in
> > > > __emulate_cmpxchg_case1_2() before __cmpxchg_case_4 is called:
> > > > do
> > > > {
> > > > read_val =
> > > > read_func(aligned_ptr);
> > > > swapped_new = read_val &
> > > > ~mask;
> > > > swapped_new |=
> > > > masked_new;
> > > > ret = cmpxchg_func(aligned_ptr, read_val,
> > > > swapped_new);
> > > > } while ( ret != read_val
> > > > );
> > > > read_val it is 'old'.
> > > >
> > > > But now I am not 100% sure that it is correct for __cmpxchg...
> > >
> > > It just can't be correct - you can't ignore "old" there. I think
> > > you
> > > want simple cmpxchg primitives, which xchg then uses in a loop
> > > (while
> > > cmpxchg uses them plainly).
> > But xchg doesn't require 'old' value, so it should be ignored in
> > some
> > way by cmpxchg.
>
> Well, no. If you have only cmpxchg, I think your only choice is - as
> said - to read the old value and then loop over cmpxchg until that
> succeeds. Not really different from other operations which need
> emulating using cmpxchg.
Then it looks like the main error in __emulate_cmpxchg_case1_2 is that
I read the value each time, so read_val = read_func(aligned_ptr);
should be before the do {...} while(). Also, it would be better to
rename it to old_val or just old.
>
> > > > > > +static always_inline unsigned short
> > > > > > __cmpxchg_case_2(volatile
> > > > > > uint32_t *ptr,
> > > > > > +
> > > > > > uint32_t
> > > > > > old,
> > > > > > +
> > > > > > uint32_t
> > > > > > new)
> > > > > > +{
> > > > > > + (void) old;
> > > > > > +
> > > > > > + if (((unsigned long)ptr & 3) == 3)
> > > > > > + {
> > > > > > +#ifdef CONFIG_64BIT
> > > > > > + return __emulate_cmpxchg_case1_2((uint64_t *)ptr,
> > > > > > new,
> > > > > > + readq,
> > > > > > __cmpxchg_case_8,
> > > > > > 0xffffU);
> > > > >
> > > > > What if ((unsigned long)ptr & 7) == 7 (which is a sub-case of
> > > > > what
> > > > > the
> > > > > if() above checks for? Isn't it more reasonable to require
> > > > > aligned
> > > > > 16-bit quantities here? Or if mis-aligned addresses are okay,
> > > > > you
> > > > > could
> > > > > as well emulate using __cmpxchg_case_4().
> > > > Yes, it will be more reasonable. I'll use IS_ALIGNED instead.
> > >
> > > Not sure I get your use of "instead" here correctly. There's more
> > > to change here than just the if() condition.
> > I meant something like:
> >
> > if ( IS_ALIGNED(ptr, 16) )
> > __emulate_cmpxchg_case1_2(...);
> > else
> > assert_failed("ptr isn't aligned\n");
>
> Except that you'd better not use assert_failed() directly anywhere,
> and the above is easier as
>
> ASSERT(IS_ALIGNED(ptr, 16));
> __emulate_cmpxchg_case1_2(...);
>
> anyway (leaving aside that I guess you mean 2, not 16).
Yeah, it should be 2. Thanks.
~ Oleksii
© 2016 - 2026 Red Hat, Inc.