Extend getrandom() vDSO implementation to VDSO64
Tested on QEMU on both ppc64_defconfig and ppc64le_defconfig.
The results are not precise as it is QEMU on an x86 laptop, but
no need to be precise to see the benefit.
~ # ./vdso_test_getrandom bench-single
vdso: 25000000 times in 4.977777162 seconds
libc: 25000000 times in 75.516749981 seconds
syscall: 25000000 times in 86.842242014 seconds
~ # ./vdso_test_getrandom bench-single
vdso: 25000000 times in 6.473814156 seconds
libc: 25000000 times in 73.875109463 seconds
syscall: 25000000 times in 71.805066229 seconds
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v5:
- VDSO32 for both PPC32 and PPC64 is in previous patch. This patch have the logic for VDSO64.
v4:
- Use __BIG_ENDIAN__ which is defined by GCC instead of CONFIG_CPU_BIG_ENDIAN which is unknown by selftests
- Implement a cleaner/smaller output copy for little endian instead of keeping compat macro.
v3: New (split out of previous patch)
---
arch/powerpc/Kconfig | 2 +-
arch/powerpc/kernel/vdso/Makefile | 8 ++-
arch/powerpc/kernel/vdso/getrandom.S | 8 +++
arch/powerpc/kernel/vdso/vdso64.lds.S | 1 +
arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 53 ++++++++++++++++++++
5 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e500a59ddecc..b45452ac4a73 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -311,7 +311,7 @@ config PPC
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
- select VDSO_GETRANDOM if VDSO32
+ select VDSO_GETRANDOM
#
# Please keep this list sorted alphabetically.
#
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index 7a4a935406d8..56fb1633529a 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -9,6 +9,7 @@ obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o not
obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o
obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o
+obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y)
@@ -21,6 +22,7 @@ endif
ifneq ($(c-getrandom-y),)
CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y)
+ CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30)
endif
# Build rules
@@ -34,7 +36,7 @@ endif
targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o
targets += crtsavres-32.o
obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o
+targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o
obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO
@@ -71,7 +73,7 @@ CPPFLAGS_vdso64.lds += -P -C
# link rule for the .so file, .lds has to be first
$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE
$(call if_changed,vdso32ld_and_check)
-$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE
$(call if_changed,vdso64ld_and_check)
# assembly rules for the .S files
@@ -87,6 +89,8 @@ $(obj-vdso64): %-64.o: %.S FORCE
$(call if_changed_dep,vdso64as)
$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE
$(call if_changed_dep,cc_o_c)
+$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE
+ $(call if_changed_dep,cc_o_c)
# Generate VDSO offsets using helper script
gen-vdso32sym := $(src)/gen_vdso32_offsets.sh
diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S
index 21773ef3fc1d..a957cd2b2b03 100644
--- a/arch/powerpc/kernel/vdso/getrandom.S
+++ b/arch/powerpc/kernel/vdso/getrandom.S
@@ -27,10 +27,18 @@
.cfi_adjust_cfa_offset PPC_MIN_STKFRM
PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
.cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
+#ifdef __powerpc64__
+ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif
get_datapage r8
addi r8, r8, VDSO_RNG_DATA_OFFSET
bl CFUNC(DOTSYM(\funct))
PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+#ifdef __powerpc64__
+ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_restore r2
+#endif
cmpwi r3, 0
mtlr r0
addi r1, r1, 2 * PPC_MIN_STKFRM
diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
index 400819258c06..9481e4b892ed 100644
--- a/arch/powerpc/kernel/vdso/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
@@ -123,6 +123,7 @@ VERSION
__kernel_sigtramp_rt64;
__kernel_getcpu;
__kernel_time;
+ __kernel_getrandom;
local: *;
};
diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
index ac85788205cb..7f9061a9e8b4 100644
--- a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
+++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
@@ -124,6 +124,26 @@
*/
SYM_FUNC_START(__arch_chacha20_blocks_nostack)
#ifdef __powerpc64__
+ std counter, -216(r1)
+
+ std r14, -144(r1)
+ std r15, -136(r1)
+ std r16, -128(r1)
+ std r17, -120(r1)
+ std r18, -112(r1)
+ std r19, -104(r1)
+ std r20, -96(r1)
+ std r21, -88(r1)
+ std r22, -80(r1)
+ std r23, -72(r1)
+ std r24, -64(r1)
+ std r25, -56(r1)
+ std r26, -48(r1)
+ std r27, -40(r1)
+ std r28, -32(r1)
+ std r29, -24(r1)
+ std r30, -16(r1)
+ std r31, -8(r1)
#else
stwu r1, -96(r1)
stw counter, 20(r1)
@@ -149,9 +169,13 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
stw r30, 88(r1)
stw r31, 92(r1)
#endif
+#endif /* __powerpc64__ */
lwz counter0, 0(counter)
lwz counter1, 4(counter)
+#ifdef __powerpc64__
+ rldimi counter0, counter1, 32, 0
+#endif
mr idx_r0, nblocks
subi dst_bytes, dst_bytes, 4
@@ -267,12 +291,21 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */
+#ifdef __powerpc64__
+ addi counter0, counter0, 1
+ srdi counter1, counter0, 32
+#else
addic counter0, counter0, 1
addze counter1, counter1
+#endif
bne .Lblock
+#ifdef __powerpc64__
+ ld counter, -216(r1)
+#else
lwz counter, 20(r1)
+#endif
stw counter0, 0(counter)
stw counter1, 4(counter)
@@ -284,6 +317,26 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
li r11, 0
li r12, 0
+#ifdef __powerpc64__
+ ld r14, -144(r1)
+ ld r15, -136(r1)
+ ld r16, -128(r1)
+ ld r17, -120(r1)
+ ld r18, -112(r1)
+ ld r19, -104(r1)
+ ld r20, -96(r1)
+ ld r21, -88(r1)
+ ld r22, -80(r1)
+ ld r23, -72(r1)
+ ld r24, -64(r1)
+ ld r25, -56(r1)
+ ld r26, -48(r1)
+ ld r27, -40(r1)
+ ld r28, -32(r1)
+ ld r29, -24(r1)
+ ld r30, -16(r1)
+ ld r31, -8(r1)
+#else
#ifdef __BIG_ENDIAN__
lmw r14, 24(r1)
#else
--
2.44.0
On 9/3/24 12:47 AM, Christophe Leroy wrote: > Extend getrandom() vDSO implementation to VDSO64 > > Tested on QEMU on both ppc64_defconfig and ppc64le_defconfig. > > The results are not precise as it is QEMU on an x86 laptop, but > no need to be precise to see the benefit. > > ~ # ./vdso_test_getrandom bench-single > vdso: 25000000 times in 4.977777162 seconds > libc: 25000000 times in 75.516749981 seconds > syscall: 25000000 times in 86.842242014 seconds > > ~ # ./vdso_test_getrandom bench-single > vdso: 25000000 times in 6.473814156 seconds > libc: 25000000 times in 73.875109463 seconds > syscall: 25000000 times in 71.805066229 seconds Tried the patchset on top of https://kernel.googlesource.com/pub/scm/linux/kernel/git/crng/random.git (commit 963233ff013377bc2aa0d641b9efbb7fd4c2b72c (origin/master, origin/HEAD, master)) Results from a Power9 (PowerNV) # ./vdso_test_getrandom bench-single vdso: 25000000 times in 0.787943615 seconds libc: 25000000 times in 14.101887252 seconds syscall: 25000000 times in 14.047475082 seconds Impressive, thanks for enabling it. Tested-by: Madhavan Srinivasan <maddy@linux.ibm.com> > Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> > --- > v5: > - VDSO32 for both PPC32 and PPC64 is in previous patch. This patch have the logic for VDSO64. > > v4: > - Use __BIG_ENDIAN__ which is defined by GCC instead of CONFIG_CPU_BIG_ENDIAN which is unknown by selftests > - Implement a cleaner/smaller output copy for little endian instead of keeping compat macro. > > v3: New (split out of previous patch) > --- > arch/powerpc/Kconfig | 2 +- > arch/powerpc/kernel/vdso/Makefile | 8 ++- > arch/powerpc/kernel/vdso/getrandom.S | 8 +++ > arch/powerpc/kernel/vdso/vdso64.lds.S | 1 + > arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 53 ++++++++++++++++++++ > 5 files changed, 69 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index e500a59ddecc..b45452ac4a73 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -311,7 +311,7 @@ config PPC > select SYSCTL_EXCEPTION_TRACE > select THREAD_INFO_IN_TASK > select TRACE_IRQFLAGS_SUPPORT > - select VDSO_GETRANDOM if VDSO32 > + select VDSO_GETRANDOM > # > # Please keep this list sorted alphabetically. > # > diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile > index 7a4a935406d8..56fb1633529a 100644 > --- a/arch/powerpc/kernel/vdso/Makefile > +++ b/arch/powerpc/kernel/vdso/Makefile > @@ -9,6 +9,7 @@ obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o not > obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o > > obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o > +obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o > > ifneq ($(c-gettimeofday-y),) > CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) > @@ -21,6 +22,7 @@ endif > > ifneq ($(c-getrandom-y),) > CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y) > + CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30) > endif > > # Build rules > @@ -34,7 +36,7 @@ endif > targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o > targets += crtsavres-32.o > obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) > -targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o > +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o > obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) > > ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO > @@ -71,7 +73,7 @@ CPPFLAGS_vdso64.lds += -P -C > # link rule for the .so file, .lds has to be first > $(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE > $(call if_changed,vdso32ld_and_check) > -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE > +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE > $(call if_changed,vdso64ld_and_check) > > # assembly rules for the .S files > @@ -87,6 +89,8 @@ $(obj-vdso64): %-64.o: %.S FORCE > $(call if_changed_dep,vdso64as) > $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE > $(call if_changed_dep,cc_o_c) > +$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE > + $(call if_changed_dep,cc_o_c) > > # Generate VDSO offsets using helper script > gen-vdso32sym := $(src)/gen_vdso32_offsets.sh > diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S > index 21773ef3fc1d..a957cd2b2b03 100644 > --- a/arch/powerpc/kernel/vdso/getrandom.S > +++ b/arch/powerpc/kernel/vdso/getrandom.S > @@ -27,10 +27,18 @@ > .cfi_adjust_cfa_offset PPC_MIN_STKFRM > PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) > .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF > +#ifdef __powerpc64__ > + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) > + .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT > +#endif > get_datapage r8 > addi r8, r8, VDSO_RNG_DATA_OFFSET > bl CFUNC(DOTSYM(\funct)) > PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) > +#ifdef __powerpc64__ > + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) > + .cfi_restore r2 > +#endif > cmpwi r3, 0 > mtlr r0 > addi r1, r1, 2 * PPC_MIN_STKFRM > diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S > index 400819258c06..9481e4b892ed 100644 > --- a/arch/powerpc/kernel/vdso/vdso64.lds.S > +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S > @@ -123,6 +123,7 @@ VERSION > __kernel_sigtramp_rt64; > __kernel_getcpu; > __kernel_time; > + __kernel_getrandom; > > local: *; > }; > diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S > index ac85788205cb..7f9061a9e8b4 100644 > --- a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S > +++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S > @@ -124,6 +124,26 @@ > */ > SYM_FUNC_START(__arch_chacha20_blocks_nostack) > #ifdef __powerpc64__ > + std counter, -216(r1) > + > + std r14, -144(r1) > + std r15, -136(r1) > + std r16, -128(r1) > + std r17, -120(r1) > + std r18, -112(r1) > + std r19, -104(r1) > + std r20, -96(r1) > + std r21, -88(r1) > + std r22, -80(r1) > + std r23, -72(r1) > + std r24, -64(r1) > + std r25, -56(r1) > + std r26, -48(r1) > + std r27, -40(r1) > + std r28, -32(r1) > + std r29, -24(r1) > + std r30, -16(r1) > + std r31, -8(r1) > #else > stwu r1, -96(r1) > stw counter, 20(r1) > @@ -149,9 +169,13 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) > stw r30, 88(r1) > stw r31, 92(r1) > #endif > +#endif /* __powerpc64__ */ > > lwz counter0, 0(counter) > lwz counter1, 4(counter) > +#ifdef __powerpc64__ > + rldimi counter0, counter1, 32, 0 > +#endif > mr idx_r0, nblocks > subi dst_bytes, dst_bytes, 4 > > @@ -267,12 +291,21 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) > > subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */ > > +#ifdef __powerpc64__ > + addi counter0, counter0, 1 > + srdi counter1, counter0, 32 > +#else > addic counter0, counter0, 1 > addze counter1, counter1 > +#endif > > bne .Lblock > > +#ifdef __powerpc64__ > + ld counter, -216(r1) > +#else > lwz counter, 20(r1) > +#endif > stw counter0, 0(counter) > stw counter1, 4(counter) > > @@ -284,6 +317,26 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) > li r11, 0 > li r12, 0 > > +#ifdef __powerpc64__ > + ld r14, -144(r1) > + ld r15, -136(r1) > + ld r16, -128(r1) > + ld r17, -120(r1) > + ld r18, -112(r1) > + ld r19, -104(r1) > + ld r20, -96(r1) > + ld r21, -88(r1) > + ld r22, -80(r1) > + ld r23, -72(r1) > + ld r24, -64(r1) > + ld r25, -56(r1) > + ld r26, -48(r1) > + ld r27, -40(r1) > + ld r28, -32(r1) > + ld r29, -24(r1) > + ld r30, -16(r1) > + ld r31, -8(r1) > +#else > #ifdef __BIG_ENDIAN__ > lmw r14, 24(r1) > #else
© 2016 - 2025 Red Hat, Inc.