[PATCH 2/2] s390/stackleak: provide fast __stackleak_poison() implementation

Heiko Carstens posted 2 patches 2 years, 10 months ago
[PATCH 2/2] s390/stackleak: provide fast __stackleak_poison() implementation
Posted by Heiko Carstens 2 years, 10 months ago
Provide an s390 specific __stackleak_poison() implementation which is
faster than the generic variant.

For the original implementation with an enforced 4kb stackframe for the
getpid() system call the system call overhead increases by a factor of 3 if
the stackleak feature is enabled. Using the s390 mvc based variant this is
reduced to an increase of 25% instead.

This is within the expected area, since the mvc based implementation is
more or less a memset64() variant which comes with similar results. See
commit 0b77d6701cf8 ("s390: implement memset16, memset32 & memset64").

Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/processor.h | 35 +++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index efffc28cbad8..dc17896a001a 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -118,6 +118,41 @@ unsigned long vdso_size(void);
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
+#define __stackleak_poison __stackleak_poison
+static __always_inline void __stackleak_poison(unsigned long erase_low,
+					       unsigned long erase_high,
+					       unsigned long poison)
+{
+	unsigned long tmp, count;
+
+	count = erase_high - erase_low;
+	if (!count)
+		return;
+	asm volatile(
+		"	cghi	%[count],8\n"
+		"	je	2f\n"
+		"	aghi	%[count],-(8+1)\n"
+		"	srlg	%[tmp],%[count],8\n"
+		"	ltgr	%[tmp],%[tmp]\n"
+		"	jz	1f\n"
+		"0:	stg	%[poison],0(%[addr])\n"
+		"	mvc	8(256-8,%[addr]),0(%[addr])\n"
+		"	la	%[addr],256(%[addr])\n"
+		"	brctg	%[tmp],0b\n"
+		"1:	stg	%[poison],0(%[addr])\n"
+		"	larl	%[tmp],3f\n"
+		"	ex	%[count],0(%[tmp])\n"
+		"	j	4f\n"
+		"2:	stg	%[poison],0(%[addr])\n"
+		"	j	4f\n"
+		"3:	mvc	8(1,%[addr]),0(%[addr])\n"
+		"4:\n"
+		: [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp)
+		: [poison] "d" (poison)
+		: "memory", "cc"
+		);
+}
+
 /*
  * Thread structure
  */
-- 
2.37.2
Re: [PATCH 2/2] s390/stackleak: provide fast __stackleak_poison() implementation
Posted by Mark Rutland 2 years, 10 months ago
On Wed, Apr 05, 2023 at 03:08:41PM +0200, Heiko Carstens wrote:
> Provide an s390 specific __stackleak_poison() implementation which is
> faster than the generic variant.
> 
> For the original implementation with an enforced 4kb stackframe for the
> getpid() system call the system call overhead increases by a factor of 3 if
> the stackleak feature is enabled. Using the s390 mvc based variant this is
> reduced to an increase of 25% instead.
> 
> This is within the expected area, since the mvc based implementation is
> more or less a memset64() variant which comes with similar results. See
> commit 0b77d6701cf8 ("s390: implement memset16, memset32 & memset64").

With that in mind, could we use memset64() directly (if we made it
noninstr-safe)?

Mark.

> 
> Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
>  arch/s390/include/asm/processor.h | 35 +++++++++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
> 
> diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
> index efffc28cbad8..dc17896a001a 100644
> --- a/arch/s390/include/asm/processor.h
> +++ b/arch/s390/include/asm/processor.h
> @@ -118,6 +118,41 @@ unsigned long vdso_size(void);
>  
>  #define HAVE_ARCH_PICK_MMAP_LAYOUT
>  
> +#define __stackleak_poison __stackleak_poison
> +static __always_inline void __stackleak_poison(unsigned long erase_low,
> +					       unsigned long erase_high,
> +					       unsigned long poison)
> +{
> +	unsigned long tmp, count;
> +
> +	count = erase_high - erase_low;
> +	if (!count)
> +		return;
> +	asm volatile(
> +		"	cghi	%[count],8\n"
> +		"	je	2f\n"
> +		"	aghi	%[count],-(8+1)\n"
> +		"	srlg	%[tmp],%[count],8\n"
> +		"	ltgr	%[tmp],%[tmp]\n"
> +		"	jz	1f\n"
> +		"0:	stg	%[poison],0(%[addr])\n"
> +		"	mvc	8(256-8,%[addr]),0(%[addr])\n"
> +		"	la	%[addr],256(%[addr])\n"
> +		"	brctg	%[tmp],0b\n"
> +		"1:	stg	%[poison],0(%[addr])\n"
> +		"	larl	%[tmp],3f\n"
> +		"	ex	%[count],0(%[tmp])\n"
> +		"	j	4f\n"
> +		"2:	stg	%[poison],0(%[addr])\n"
> +		"	j	4f\n"
> +		"3:	mvc	8(1,%[addr]),0(%[addr])\n"
> +		"4:\n"
> +		: [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp)
> +		: [poison] "d" (poison)
> +		: "memory", "cc"
> +		);
> +}
> +
>  /*
>   * Thread structure
>   */
> -- 
> 2.37.2
>