arch/riscv/kernel/kexec_relocate.S | 52 +++++++++++++----------------- 1 file changed, 23 insertions(+), 29 deletions(-)
For readability and simplicity, cleanup the riscv_kexec_relocate code:
- Re-sort the first 4 `mv` instructions against `riscv_kexec_method()`
- Eliminate registers for debugging (s9,s10,s11) and storing const-value (s5,s6)
- Replace `jalr` with `jr` for no-link jump
I tested this on Qemu virt machine and works as it was.
Signed-off-by: Song Shuai <songshuaishuai@tinylab.org>
---
arch/riscv/kernel/kexec_relocate.S | 52 +++++++++++++-----------------
1 file changed, 23 insertions(+), 29 deletions(-)
diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
index 059c5e216ae7..de0a4b35d01e 100644
--- a/arch/riscv/kernel/kexec_relocate.S
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -17,27 +17,17 @@ SYM_CODE_START(riscv_kexec_relocate)
* s1: (const) Phys address to jump to after relocation
* s2: (const) Phys address of the FDT image
* s3: (const) The hartid of the current hart
- * s4: Pointer to the destination address for the relocation
- * s5: (const) Number of words per page
- * s6: (const) 1, used for subtraction
- * s7: (const) kernel_map.va_pa_offset, used when switching MMU off
- * s8: (const) Physical address of the main loop
- * s9: (debug) indirection page counter
- * s10: (debug) entry counter
- * s11: (debug) copied words counter
+ * s4: (const) kernel_map.va_pa_offset, used when switching MMU off
+ * s5: Pointer to the destination address for the relocation
+ * s6: (const) Physical address of the main loop
*/
mv s0, a0
mv s1, a1
mv s2, a2
mv s3, a3
- mv s4, zero
- li s5, (PAGE_SIZE / RISCV_SZPTR)
- li s6, 1
- mv s7, a4
- mv s8, zero
- mv s9, zero
- mv s10, zero
- mv s11, zero
+ mv s4, a4
+ mv s5, zero
+ mv s6, zero
/* Disable / cleanup interrupts */
csrw CSR_SIE, zero
@@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate)
* the start of the loop below so that we jump there in
* any case.
*/
- la s8, 1f
- sub s8, s8, s7
- csrw CSR_STVEC, s8
+ la s6, 1f
+ sub s6, s6, s4
+ csrw CSR_STVEC, s6
+
+ /*
+ * With C-extension, here we get 42 Bytes and the next
+ * .align directive would pad zeros here up to 44 Bytes.
+ * So manually put a nop here to avoid zeros padding.
+ */
+ nop
/* Process entries in a loop */
.align 2
1:
- addi s10, s10, 1
REG_L t0, 0(s0) /* t0 = *image->entry */
addi s0, s0, RISCV_SZPTR /* image->entry++ */
/* IND_DESTINATION entry ? -> save destination address */
andi t1, t0, 0x1
beqz t1, 2f
- andi s4, t0, ~0x1
+ andi s5, t0, ~0x1
j 1b
2:
@@ -74,9 +70,8 @@ SYM_CODE_START(riscv_kexec_relocate)
andi t1, t0, 0x2
beqz t1, 2f
andi s0, t0, ~0x2
- addi s9, s9, 1
csrw CSR_SATP, zero
- jalr zero, s8, 0
+ jr s6
2:
/* IND_DONE entry ? -> jump to done label */
@@ -92,14 +87,13 @@ SYM_CODE_START(riscv_kexec_relocate)
andi t1, t0, 0x8
beqz t1, 1b /* Unknown entry type, ignore it */
andi t0, t0, ~0x8
- mv t3, s5 /* i = num words per page */
+ li t3, (PAGE_SIZE / RISCV_SZPTR) /* i = num words per page */
3: /* copy loop */
REG_L t1, (t0) /* t1 = *src_ptr */
- REG_S t1, (s4) /* *dst_ptr = *src_ptr */
+ REG_S t1, (s5) /* *dst_ptr = *src_ptr */
addi t0, t0, RISCV_SZPTR /* stc_ptr++ */
- addi s4, s4, RISCV_SZPTR /* dst_ptr++ */
- sub t3, t3, s6 /* i-- */
- addi s11, s11, 1 /* c++ */
+ addi s5, s5, RISCV_SZPTR /* dst_ptr++ */
+ addi t3, t3, -0x1 /* i-- */
beqz t3, 1b /* copy done ? */
j 3b
@@ -146,7 +140,7 @@ SYM_CODE_START(riscv_kexec_relocate)
*/
fence.i
- jalr zero, a2, 0
+ jr a2
SYM_CODE_END(riscv_kexec_relocate)
riscv_kexec_relocate_end:
--
2.20.1
On Aug 30 2023, Song Shuai wrote: > @@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate) > * the start of the loop below so that we jump there in > * any case. > */ > - la s8, 1f > - sub s8, s8, s7 > - csrw CSR_STVEC, s8 > + la s6, 1f > + sub s6, s6, s4 > + csrw CSR_STVEC, s6 > + > + /* > + * With C-extension, here we get 42 Bytes and the next > + * .align directive would pad zeros here up to 44 Bytes. > + * So manually put a nop here to avoid zeros padding. > + */ > + nop > > /* Process entries in a loop */ > .align 2 While you are at it, I'd suggest being explicit about .palign vs. .balign. -- Andreas Schwab, SUSE Labs, schwab@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."
Hi, Andreas: 在 2023/8/30 15:24, Andreas Schwab 写道: > On Aug 30 2023, Song Shuai wrote: > >> @@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate) >> * the start of the loop below so that we jump there in >> * any case. >> */ >> - la s8, 1f >> - sub s8, s8, s7 >> - csrw CSR_STVEC, s8 >> + la s6, 1f >> + sub s6, s6, s4 >> + csrw CSR_STVEC, s6 >> + >> + /* >> + * With C-extension, here we get 42 Bytes and the next >> + * .align directive would pad zeros here up to 44 Bytes. >> + * So manually put a nop here to avoid zeros padding. >> + */ >> + nop >> >> /* Process entries in a loop */ >> .align 2 > > While you are at it, I'd suggest being explicit about .palign > vs. .balign. > How about this commemt: Due to the stvec.BASE 4-byte alignment constraint, the following .align (alias of .p2align) directive will align the next instruction to a 4-byte boundary by padding zeros for this .rodata section. With C-extension, here we get 42 Bytes and would be padded with zeros up to 44 Bytes. So manually put a nop here to avoid it. -- Thanks Song Shuai
On Aug 31 2023, Song Shuai wrote: > Hi, Andreas: > > 在 2023/8/30 15:24, Andreas Schwab 写道: >> On Aug 30 2023, Song Shuai wrote: >> >>> @@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate) >>> * the start of the loop below so that we jump there in >>> * any case. >>> */ >>> - la s8, 1f >>> - sub s8, s8, s7 >>> - csrw CSR_STVEC, s8 >>> + la s6, 1f >>> + sub s6, s6, s4 >>> + csrw CSR_STVEC, s6 >>> + >>> + /* >>> + * With C-extension, here we get 42 Bytes and the next >>> + * .align directive would pad zeros here up to 44 Bytes. >>> + * So manually put a nop here to avoid zeros padding. >>> + */ >>> + nop >>> /* Process entries in a loop */ >>> .align 2 >> While you are at it, I'd suggest being explicit about .palign >> vs. .balign. >> > How about this commemt: My suggestion is to change .align 2 in the last line to .palign 2 as part of the cleanup. The meaning of .align is target dependent, and someone not familiar with RISC-V may misinterpret it. -- Andreas Schwab, SUSE Labs, schwab@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 "And now for something completely different."
© 2016 - 2025 Red Hat, Inc.