[PATCH] xen: Introduce cmpxchg64() and guest_cmpxchg64()

Julien Grall posted 1 patch 4 years, 3 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/xen tags/patchew/20200815172143.1327-1-julien@xen.org
Maintainers: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>, Jan Beulich <jbeulich@suse.com>, Stefano Stabellini <sstabellini@kernel.org>, Wei Liu <wl@xen.org>, "Roger Pau Monné" <roger.pau@citrix.com>, Andrew Cooper <andrew.cooper3@citrix.com>, Julien Grall <julien@xen.org>
xen/include/asm-arm/arm32/cmpxchg.h | 68 +++++++++++++++++++++++++++++
xen/include/asm-arm/arm64/cmpxchg.h |  5 +++
xen/include/asm-arm/guest_atomics.h | 22 ++++++++++
xen/include/asm-x86/guest_atomics.h |  2 +
xen/include/asm-x86/x86_64/system.h |  2 +
5 files changed, 99 insertions(+)
[PATCH] xen: Introduce cmpxchg64() and guest_cmpxchg64()
Posted by Julien Grall 4 years, 3 months ago
From: Julien Grall <jgrall@amazon.com>

The IOREQ code is using cmpxchg() with 64-bit value. At the moment, this
is x86 code, but there is plan to make it common.

To cater 32-bit arch, introduce two new helpers to deal with 64-bit
cmpxchg.

The Arm 32-bit implementation of cmpxchg64() is based on the __cmpxchg64
in Linux v5.8 (arch/arm/include/asm/cmpxchg.h).

Signed-off-by: Julien Grall <jgrall@amazon.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
---
 xen/include/asm-arm/arm32/cmpxchg.h | 68 +++++++++++++++++++++++++++++
 xen/include/asm-arm/arm64/cmpxchg.h |  5 +++
 xen/include/asm-arm/guest_atomics.h | 22 ++++++++++
 xen/include/asm-x86/guest_atomics.h |  2 +
 xen/include/asm-x86/x86_64/system.h |  2 +
 5 files changed, 99 insertions(+)

diff --git a/xen/include/asm-arm/arm32/cmpxchg.h b/xen/include/asm-arm/arm32/cmpxchg.h
index 0770f272ee99..5e2fa6ee38a0 100644
--- a/xen/include/asm-arm/arm32/cmpxchg.h
+++ b/xen/include/asm-arm/arm32/cmpxchg.h
@@ -87,6 +87,38 @@ __CMPXCHG_CASE(b, 1)
 __CMPXCHG_CASE(h, 2)
 __CMPXCHG_CASE( , 4)
 
+static inline bool __cmpxchg_case_8(volatile uint64_t *ptr,
+			 	    uint64_t *old,
+			 	    uint64_t new,
+			 	    bool timeout,
+				    unsigned int max_try)
+{
+	uint64_t oldval;
+	uint64_t res;
+
+	do {
+		asm volatile(
+		"	ldrexd		%1, %H1, [%3]\n"
+		"	teq		%1, %4\n"
+		"	teqeq		%H1, %H4\n"
+		"	movne		%0, #0\n"
+		"	movne		%H0, #0\n"
+		"	bne		2f\n"
+		"	strexd		%0, %5, %H5, [%3]\n"
+		"	teq		%0, #0\n"
+		"2:"
+		: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
+		: "r" (ptr), "r" (*old), "r" (new)
+		: "memory", "cc");
+		if (!res)
+			break;
+	} while (!timeout || ((--max_try) > 0));
+
+	*old = oldval;
+
+	return !res;
+}
+
 static always_inline bool __int_cmpxchg(volatile void *ptr, unsigned long *old,
 					unsigned long new, int size,
 					bool timeout, unsigned int max_try)
@@ -156,6 +188,30 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
 	return ret;
 }
 
+/*
+ * The helper may fail to update the memory if the action takes too long.
+ *
+ * @old: On call the value pointed contains the expected old value. It will be
+ * updated to the actual old value.
+ * @max_try: Maximum number of iterations
+ *
+ * The helper will return true when the update has succeeded (i.e no
+ * timeout) and false if the update has failed.
+ */
+static always_inline bool __cmpxchg64_mb_timeout(volatile uint64_t *ptr,
+						 uint64_t *old,
+						 uint64_t new,
+						 unsigned int max_try)
+{
+	bool ret;
+
+	smp_mb();
+	ret = __cmpxchg_case_8(ptr, old, new, true, max_try);
+	smp_mb();
+
+	return ret;
+}
+
 #define cmpxchg(ptr,o,n)						\
 	((__typeof__(*(ptr)))__cmpxchg_mb((ptr),			\
 					  (unsigned long)(o),		\
@@ -167,6 +223,18 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
 				       (unsigned long)(o),		\
 				       (unsigned long)(n),		\
 				       sizeof(*(ptr))))
+
+static inline uint64_t cmpxchg64(volatile uint64_t *ptr,
+				 uint64_t old,
+				 uint64_t new)
+{
+	smp_mb();
+	if (!__cmpxchg_case_8(ptr, &old, new, false, 0))
+		ASSERT_UNREACHABLE();
+
+	return old;
+}
+
 #endif
 /*
  * Local variables:
diff --git a/xen/include/asm-arm/arm64/cmpxchg.h b/xen/include/asm-arm/arm64/cmpxchg.h
index fc5c60f0bd74..de9cd0ee2b07 100644
--- a/xen/include/asm-arm/arm64/cmpxchg.h
+++ b/xen/include/asm-arm/arm64/cmpxchg.h
@@ -187,6 +187,11 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
 	__ret; \
 })
 
+#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
+
+#define __cmpxchg64_mb_timeout(ptr, old, new, max_try) \
+	__cmpxchg_mb_timeout(ptr, old, new, 8, max_try)
+
 #endif
 /*
  * Local variables:
diff --git a/xen/include/asm-arm/guest_atomics.h b/xen/include/asm-arm/guest_atomics.h
index af27cc627bf3..28ce402bea79 100644
--- a/xen/include/asm-arm/guest_atomics.h
+++ b/xen/include/asm-arm/guest_atomics.h
@@ -115,6 +115,28 @@ static inline unsigned long __guest_cmpxchg(struct domain *d,
                                          (unsigned long)(n),\
                                          sizeof (*(ptr))))
 
+static inline uint64_t guest_cmpxchg64(struct domain *d,
+                                       volatile uint64_t *ptr,
+                                       uint64_t old,
+                                       uint64_t new)
+{
+    uint64_t oldval = old;
+
+    perfc_incr(atomics_guest);
+
+    if ( __cmpxchg64_mb_timeout(ptr, &oldval, new,
+                                this_cpu(guest_safe_atomic_max)) )
+        return oldval;
+
+    perfc_incr(atomics_guest_paused);
+
+    domain_pause_nosync(d);
+    oldval = cmpxchg64(ptr, old, new);
+    domain_unpause(d);
+
+    return oldval;
+}
+
 #endif /* _ARM_GUEST_ATOMICS_H */
 /*
  * Local variables:
diff --git a/xen/include/asm-x86/guest_atomics.h b/xen/include/asm-x86/guest_atomics.h
index 029417c8ffc1..f4de9d3631ff 100644
--- a/xen/include/asm-x86/guest_atomics.h
+++ b/xen/include/asm-x86/guest_atomics.h
@@ -20,6 +20,8 @@
     ((void)(d), test_and_change_bit(nr, p))
 
 #define guest_cmpxchg(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
+#define guest_cmpxchg64(d, ptr, o, n) ((void)(d), cmpxchg64(ptr, o, n))
+
 
 #endif /* _X86_GUEST_ATOMICS_H */
 /*
diff --git a/xen/include/asm-x86/x86_64/system.h b/xen/include/asm-x86/x86_64/system.h
index f471859c19cc..c1b16105e9f2 100644
--- a/xen/include/asm-x86/x86_64/system.h
+++ b/xen/include/asm-x86/x86_64/system.h
@@ -5,6 +5,8 @@
     ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),            \
                                    (unsigned long)(n),sizeof(*(ptr))))
 
+#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
+
 /*
  * Atomic 16 bytes compare and exchange.  Compare OLD with MEM, if
  * identical, store NEW in MEM.  Return the initial value in MEM.
-- 
2.17.1


Re: [PATCH] xen: Introduce cmpxchg64() and guest_cmpxchg64()
Posted by Oleksandr 4 years, 3 months ago
On 15.08.20 20:21, Julien Grall wrote:

Hi Julien

> From: Julien Grall <jgrall@amazon.com>
>
> The IOREQ code is using cmpxchg() with 64-bit value. At the moment, this
> is x86 code, but there is plan to make it common.
>
> To cater 32-bit arch, introduce two new helpers to deal with 64-bit
> cmpxchg.
>
> The Arm 32-bit implementation of cmpxchg64() is based on the __cmpxchg64
> in Linux v5.8 (arch/arm/include/asm/cmpxchg.h).
>
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
> ---
>   xen/include/asm-arm/arm32/cmpxchg.h | 68 +++++++++++++++++++++++++++++
>   xen/include/asm-arm/arm64/cmpxchg.h |  5 +++
>   xen/include/asm-arm/guest_atomics.h | 22 ++++++++++
>   xen/include/asm-x86/guest_atomics.h |  2 +
>   xen/include/asm-x86/x86_64/system.h |  2 +
>   5 files changed, 99 insertions(+)

Thank you for doing this. I have successfully build-tested your patch 
with IOREQ series on Arm32/Arm64 (of course before that I had changed 
cmpxchg to guest_cmpxchg64 in hvm_send_buffered_ioreq()).


-- 
Regards,

Oleksandr Tyshchenko


Re: [PATCH] xen: Introduce cmpxchg64() and guest_cmpxchg64()
Posted by Stefano Stabellini 4 years, 3 months ago
On Sat, 15 Aug 2020, Julien Grall wrote:
> From: Julien Grall <jgrall@amazon.com>
> 
> The IOREQ code is using cmpxchg() with 64-bit value. At the moment, this
> is x86 code, but there is plan to make it common.
> 
> To cater 32-bit arch, introduce two new helpers to deal with 64-bit
> cmpxchg.
> 
> The Arm 32-bit implementation of cmpxchg64() is based on the __cmpxchg64
> in Linux v5.8 (arch/arm/include/asm/cmpxchg.h).
> 
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
> ---
>  xen/include/asm-arm/arm32/cmpxchg.h | 68 +++++++++++++++++++++++++++++
>  xen/include/asm-arm/arm64/cmpxchg.h |  5 +++
>  xen/include/asm-arm/guest_atomics.h | 22 ++++++++++
>  xen/include/asm-x86/guest_atomics.h |  2 +
>  xen/include/asm-x86/x86_64/system.h |  2 +
>  5 files changed, 99 insertions(+)
> 
> diff --git a/xen/include/asm-arm/arm32/cmpxchg.h b/xen/include/asm-arm/arm32/cmpxchg.h
> index 0770f272ee99..5e2fa6ee38a0 100644
> --- a/xen/include/asm-arm/arm32/cmpxchg.h
> +++ b/xen/include/asm-arm/arm32/cmpxchg.h
> @@ -87,6 +87,38 @@ __CMPXCHG_CASE(b, 1)
>  __CMPXCHG_CASE(h, 2)
>  __CMPXCHG_CASE( , 4)
>  
> +static inline bool __cmpxchg_case_8(volatile uint64_t *ptr,
> +			 	    uint64_t *old,
> +			 	    uint64_t new,
> +			 	    bool timeout,
> +				    unsigned int max_try)
> +{
> +	uint64_t oldval;
> +	uint64_t res;
> +
> +	do {
> +		asm volatile(
> +		"	ldrexd		%1, %H1, [%3]\n"
> +		"	teq		%1, %4\n"
> +		"	teqeq		%H1, %H4\n"
> +		"	movne		%0, #0\n"
> +		"	movne		%H0, #0\n"
> +		"	bne		2f\n"
> +		"	strexd		%0, %5, %H5, [%3]\n"
> +		"	teq		%0, #0\n"

Apologies if I am misreading this code, but this last "teq" instruction
doesn't seem to be useful?


> +		"2:"
> +		: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
                                              ^ not used ?


> +		: "r" (ptr), "r" (*old), "r" (new)
> +		: "memory", "cc");
> +		if (!res)
> +			break;
> +	} while (!timeout || ((--max_try) > 0));
> +
> +	*old = oldval;
> +
> +	return !res;
> +}
> +
>  static always_inline bool __int_cmpxchg(volatile void *ptr, unsigned long *old,
>  					unsigned long new, int size,
>  					bool timeout, unsigned int max_try)
> @@ -156,6 +188,30 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
>  	return ret;
>  }
>  
> +/*
> + * The helper may fail to update the memory if the action takes too long.
> + *
> + * @old: On call the value pointed contains the expected old value. It will be
> + * updated to the actual old value.
> + * @max_try: Maximum number of iterations
> + *
> + * The helper will return true when the update has succeeded (i.e no
> + * timeout) and false if the update has failed.
> + */
> +static always_inline bool __cmpxchg64_mb_timeout(volatile uint64_t *ptr,
> +						 uint64_t *old,
> +						 uint64_t new,
> +						 unsigned int max_try)
> +{
> +	bool ret;
> +
> +	smp_mb();
> +	ret = __cmpxchg_case_8(ptr, old, new, true, max_try);
> +	smp_mb();
> +
> +	return ret;
> +}
> +
>  #define cmpxchg(ptr,o,n)						\
>  	((__typeof__(*(ptr)))__cmpxchg_mb((ptr),			\
>  					  (unsigned long)(o),		\
> @@ -167,6 +223,18 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
>  				       (unsigned long)(o),		\
>  				       (unsigned long)(n),		\
>  				       sizeof(*(ptr))))
> +
> +static inline uint64_t cmpxchg64(volatile uint64_t *ptr,
> +				 uint64_t old,
> +				 uint64_t new)
> +{
> +	smp_mb();

I was looking at the existing code I noticed that we don't have a
corresponding smp_mb(); in this position. Is it needed here because of
the 64bit-ness?


> +	if (!__cmpxchg_case_8(ptr, &old, new, false, 0))
> +		ASSERT_UNREACHABLE();
> +
> +	return old;
> +}
> +
>  #endif
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/arm64/cmpxchg.h b/xen/include/asm-arm/arm64/cmpxchg.h
> index fc5c60f0bd74..de9cd0ee2b07 100644
> --- a/xen/include/asm-arm/arm64/cmpxchg.h
> +++ b/xen/include/asm-arm/arm64/cmpxchg.h
> @@ -187,6 +187,11 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
>  	__ret; \
>  })
>  
> +#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
> +
> +#define __cmpxchg64_mb_timeout(ptr, old, new, max_try) \
> +	__cmpxchg_mb_timeout(ptr, old, new, 8, max_try)
> +
>  #endif
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-arm/guest_atomics.h b/xen/include/asm-arm/guest_atomics.h
> index af27cc627bf3..28ce402bea79 100644
> --- a/xen/include/asm-arm/guest_atomics.h
> +++ b/xen/include/asm-arm/guest_atomics.h
> @@ -115,6 +115,28 @@ static inline unsigned long __guest_cmpxchg(struct domain *d,
>                                           (unsigned long)(n),\
>                                           sizeof (*(ptr))))
>  
> +static inline uint64_t guest_cmpxchg64(struct domain *d,
> +                                       volatile uint64_t *ptr,
> +                                       uint64_t old,
> +                                       uint64_t new)
> +{
> +    uint64_t oldval = old;
> +
> +    perfc_incr(atomics_guest);
> +
> +    if ( __cmpxchg64_mb_timeout(ptr, &oldval, new,
> +                                this_cpu(guest_safe_atomic_max)) )
> +        return oldval;
> +
> +    perfc_incr(atomics_guest_paused);
> +
> +    domain_pause_nosync(d);
> +    oldval = cmpxchg64(ptr, old, new);
> +    domain_unpause(d);
> +
> +    return oldval;
> +}
> +
>  #endif /* _ARM_GUEST_ATOMICS_H */
>  /*
>   * Local variables:
> diff --git a/xen/include/asm-x86/guest_atomics.h b/xen/include/asm-x86/guest_atomics.h
> index 029417c8ffc1..f4de9d3631ff 100644
> --- a/xen/include/asm-x86/guest_atomics.h
> +++ b/xen/include/asm-x86/guest_atomics.h
> @@ -20,6 +20,8 @@
>      ((void)(d), test_and_change_bit(nr, p))
>  
>  #define guest_cmpxchg(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
> +#define guest_cmpxchg64(d, ptr, o, n) ((void)(d), cmpxchg64(ptr, o, n))
> +
>  
>  #endif /* _X86_GUEST_ATOMICS_H */
>  /*
> diff --git a/xen/include/asm-x86/x86_64/system.h b/xen/include/asm-x86/x86_64/system.h
> index f471859c19cc..c1b16105e9f2 100644
> --- a/xen/include/asm-x86/x86_64/system.h
> +++ b/xen/include/asm-x86/x86_64/system.h
> @@ -5,6 +5,8 @@
>      ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),            \
>                                     (unsigned long)(n),sizeof(*(ptr))))
>  
> +#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
> +
>  /*
>   * Atomic 16 bytes compare and exchange.  Compare OLD with MEM, if
>   * identical, store NEW in MEM.  Return the initial value in MEM.
> -- 
> 2.17.1
> 

Re: [PATCH] xen: Introduce cmpxchg64() and guest_cmpxchg64()
Posted by Julien Grall 4 years, 3 months ago
Hi Stefano,

On 17/08/2020 23:56, Stefano Stabellini wrote:
> On Sat, 15 Aug 2020, Julien Grall wrote:
>> From: Julien Grall <jgrall@amazon.com>
>>
>> The IOREQ code is using cmpxchg() with 64-bit value. At the moment, this
>> is x86 code, but there is plan to make it common.
>>
>> To cater 32-bit arch, introduce two new helpers to deal with 64-bit
>> cmpxchg.
>>
>> The Arm 32-bit implementation of cmpxchg64() is based on the __cmpxchg64
>> in Linux v5.8 (arch/arm/include/asm/cmpxchg.h).
>>
>> Signed-off-by: Julien Grall <jgrall@amazon.com>
>> Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>> ---
>>   xen/include/asm-arm/arm32/cmpxchg.h | 68 +++++++++++++++++++++++++++++
>>   xen/include/asm-arm/arm64/cmpxchg.h |  5 +++
>>   xen/include/asm-arm/guest_atomics.h | 22 ++++++++++
>>   xen/include/asm-x86/guest_atomics.h |  2 +
>>   xen/include/asm-x86/x86_64/system.h |  2 +
>>   5 files changed, 99 insertions(+)
>>
>> diff --git a/xen/include/asm-arm/arm32/cmpxchg.h b/xen/include/asm-arm/arm32/cmpxchg.h
>> index 0770f272ee99..5e2fa6ee38a0 100644
>> --- a/xen/include/asm-arm/arm32/cmpxchg.h
>> +++ b/xen/include/asm-arm/arm32/cmpxchg.h
>> @@ -87,6 +87,38 @@ __CMPXCHG_CASE(b, 1)
>>   __CMPXCHG_CASE(h, 2)
>>   __CMPXCHG_CASE( , 4)
>>   
>> +static inline bool __cmpxchg_case_8(volatile uint64_t *ptr,
>> +			 	    uint64_t *old,
>> +			 	    uint64_t new,
>> +			 	    bool timeout,
>> +				    unsigned int max_try)
>> +{
>> +	uint64_t oldval;
>> +	uint64_t res;
>> +
>> +	do {
>> +		asm volatile(
>> +		"	ldrexd		%1, %H1, [%3]\n"
>> +		"	teq		%1, %4\n"
>> +		"	teqeq		%H1, %H4\n"
>> +		"	movne		%0, #0\n"
>> +		"	movne		%H0, #0\n"
>> +		"	bne		2f\n"
>> +		"	strexd		%0, %5, %H5, [%3]\n"
>> +		"	teq		%0, #0\n"
> 
> Apologies if I am misreading this code, but this last "teq" instruction
> doesn't seem to be useful?

Urgh, I forgot to remove it. The Linux version is looping in assembly 
but I had to convert to C in order to cater the timeout.

I will remove it in the next version.

> 
> 
>> +		"2:"
>> +		: "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
>                                                ^ not used ?
> 
> 
>> +		: "r" (ptr), "r" (*old), "r" (new)
>> +		: "memory", "cc");
>> +		if (!res)
>> +			break;
>> +	} while (!timeout || ((--max_try) > 0));
>> +
>> +	*old = oldval;
>> +
>> +	return !res;
>> +}
>> +
>>   static always_inline bool __int_cmpxchg(volatile void *ptr, unsigned long *old,
>>   					unsigned long new, int size,
>>   					bool timeout, unsigned int max_try)
>> @@ -156,6 +188,30 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
>>   	return ret;
>>   }
>>   
>> +/*
>> + * The helper may fail to update the memory if the action takes too long.
>> + *
>> + * @old: On call the value pointed contains the expected old value. It will be
>> + * updated to the actual old value.
>> + * @max_try: Maximum number of iterations
>> + *
>> + * The helper will return true when the update has succeeded (i.e no
>> + * timeout) and false if the update has failed.
>> + */
>> +static always_inline bool __cmpxchg64_mb_timeout(volatile uint64_t *ptr,
>> +						 uint64_t *old,
>> +						 uint64_t new,
>> +						 unsigned int max_try)
>> +{
>> +	bool ret;
>> +
>> +	smp_mb();
>> +	ret = __cmpxchg_case_8(ptr, old, new, true, max_try);
>> +	smp_mb();
>> +
>> +	return ret;
>> +}
>> +
>>   #define cmpxchg(ptr,o,n)						\
>>   	((__typeof__(*(ptr)))__cmpxchg_mb((ptr),			\
>>   					  (unsigned long)(o),		\
>> @@ -167,6 +223,18 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
>>   				       (unsigned long)(o),		\
>>   				       (unsigned long)(n),		\
>>   				       sizeof(*(ptr))))
>> +
>> +static inline uint64_t cmpxchg64(volatile uint64_t *ptr,
>> +				 uint64_t old,
>> +				 uint64_t new)
>> +{
>> +	smp_mb();
> 
> I was looking at the existing code I noticed that we don't have a
> corresponding smp_mb(); in this position. Is it needed here because of
> the 64bit-ness?

We have barriers also in the existing. The code can be a bit confusing 
because __cmpxchg() refers to a local cmpxchg.

In our case, the corresponding version if __cmpxchg_mb().

To be honest, the existing naming is a bit confusing. I am thinking to 
drop cmpxcgh_local() completely as this is not used. This would also 
make the cod easier to read. What do you think?


> 
> 
>> +	if (!__cmpxchg_case_8(ptr, &old, new, false, 0))
>> +		ASSERT_UNREACHABLE();

And I forgot the smp_mb() here :(.

>> +
>> +	return old;
>> +}
>> +
>>   #endif
>>   /*
>>    * Local variables:
>> diff --git a/xen/include/asm-arm/arm64/cmpxchg.h b/xen/include/asm-arm/arm64/cmpxchg.h
>> index fc5c60f0bd74..de9cd0ee2b07 100644
>> --- a/xen/include/asm-arm/arm64/cmpxchg.h
>> +++ b/xen/include/asm-arm/arm64/cmpxchg.h
>> @@ -187,6 +187,11 @@ static always_inline bool __cmpxchg_mb_timeout(volatile void *ptr,
>>   	__ret; \
>>   })
>>   
>> +#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
>> +
>> +#define __cmpxchg64_mb_timeout(ptr, old, new, max_try) \
>> +	__cmpxchg_mb_timeout(ptr, old, new, 8, max_try)
>> +
>>   #endif
>>   /*
>>    * Local variables:
>> diff --git a/xen/include/asm-arm/guest_atomics.h b/xen/include/asm-arm/guest_atomics.h
>> index af27cc627bf3..28ce402bea79 100644
>> --- a/xen/include/asm-arm/guest_atomics.h
>> +++ b/xen/include/asm-arm/guest_atomics.h
>> @@ -115,6 +115,28 @@ static inline unsigned long __guest_cmpxchg(struct domain *d,
>>                                            (unsigned long)(n),\
>>                                            sizeof (*(ptr))))
>>   
>> +static inline uint64_t guest_cmpxchg64(struct domain *d,
>> +                                       volatile uint64_t *ptr,
>> +                                       uint64_t old,
>> +                                       uint64_t new)
>> +{
>> +    uint64_t oldval = old;
>> +
>> +    perfc_incr(atomics_guest);
>> +
>> +    if ( __cmpxchg64_mb_timeout(ptr, &oldval, new,
>> +                                this_cpu(guest_safe_atomic_max)) )
>> +        return oldval;
>> +
>> +    perfc_incr(atomics_guest_paused);
>> +
>> +    domain_pause_nosync(d);
>> +    oldval = cmpxchg64(ptr, old, new);
>> +    domain_unpause(d);
>> +
>> +    return oldval;
>> +}
>> +
>>   #endif /* _ARM_GUEST_ATOMICS_H */
>>   /*
>>    * Local variables:
>> diff --git a/xen/include/asm-x86/guest_atomics.h b/xen/include/asm-x86/guest_atomics.h
>> index 029417c8ffc1..f4de9d3631ff 100644
>> --- a/xen/include/asm-x86/guest_atomics.h
>> +++ b/xen/include/asm-x86/guest_atomics.h
>> @@ -20,6 +20,8 @@
>>       ((void)(d), test_and_change_bit(nr, p))
>>   
>>   #define guest_cmpxchg(d, ptr, o, n) ((void)(d), cmpxchg(ptr, o, n))
>> +#define guest_cmpxchg64(d, ptr, o, n) ((void)(d), cmpxchg64(ptr, o, n))
>> +
>>   
>>   #endif /* _X86_GUEST_ATOMICS_H */
>>   /*
>> diff --git a/xen/include/asm-x86/x86_64/system.h b/xen/include/asm-x86/x86_64/system.h
>> index f471859c19cc..c1b16105e9f2 100644
>> --- a/xen/include/asm-x86/x86_64/system.h
>> +++ b/xen/include/asm-x86/x86_64/system.h
>> @@ -5,6 +5,8 @@
>>       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),            \
>>                                      (unsigned long)(n),sizeof(*(ptr))))
>>   
>> +#define cmpxchg64(ptr, o, n) cmpxchg(ptr, o, n)
>> +
>>   /*
>>    * Atomic 16 bytes compare and exchange.  Compare OLD with MEM, if
>>    * identical, store NEW in MEM.  Return the initial value in MEM.
>> -- 
>> 2.17.1
>>

Cheers,

-- 
Julien Grall