[PATCH 1/6] arm64: Provide dcache_by_myline_op_nosync helper

Barry Song posted 6 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH 1/6] arm64: Provide dcache_by_myline_op_nosync helper
Posted by Barry Song 1 month, 3 weeks ago
From: Barry Song <v-songbaohua@oppo.com>

dcache_by_myline_op ensures completion of the data cache operations for a
region, while dcache_by_myline_op_nosync only issues them without waiting.
This enables deferred synchronization so completion for multiple regions
can be handled together later.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ada Couprie Diaz <ada.coupriediaz@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tangquan Zheng <zhengtangquan@oppo.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
---
 arch/arm64/include/asm/assembler.h | 79 ++++++++++++++++++++++--------
 1 file changed, 59 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index f0ca7196f6fa..7d84a9ca7880 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -366,22 +366,7 @@ alternative_else
 alternative_endif
 	.endm
 
-/*
- * Macro to perform a data cache maintenance for the interval
- * [start, end) with dcache line size explicitly provided.
- *
- * 	op:		operation passed to dc instruction
- * 	domain:		domain used in dsb instruction
- * 	start:          starting virtual address of the region
- * 	end:            end virtual address of the region
- *	linesz:		dcache line size
- * 	fixup:		optional label to branch to on user fault
- * 	Corrupts:       start, end, tmp
- */
-	.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
-	sub	\tmp, \linesz, #1
-	bic	\start, \start, \tmp
-.Ldcache_op\@:
+	.macro __dcache_op_line op, start
 	.ifc	\op, cvau
 	__dcache_op_workaround_clean_cache \op, \start
 	.else
@@ -399,14 +384,54 @@ alternative_endif
 	.endif
 	.endif
 	.endif
-	add	\start, \start, \linesz
-	cmp	\start, \end
-	b.lo	.Ldcache_op\@
-	dsb	\domain
+	.endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end) with dcache line size explicitly provided.
+ *
+ * 	op:		operation passed to dc instruction
+ * 	domain:		domain used in dsb instruction
+ * 	start:          starting virtual address of the region
+ * 	end:            end virtual address of the region
+ *	linesz:		dcache line size
+ * 	fixup:		optional label to branch to on user fault
+ * 	Corrupts:       start, end, tmp
+ */
+	.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
+	sub	\tmp, \linesz, #1
+	bic	\start, \start, \tmp
+.Ldcache_op\@:
+	__dcache_op_line \op, \start
+	add     \start, \start, \linesz
+	cmp     \start, \end
+	b.lo    .Ldcache_op\@
 
+	dsb	\domain
 	_cond_uaccess_extable .Ldcache_op\@, \fixup
 	.endm
 
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end) with dcache line size explicitly provided.
+ * It won't wait for the completion of the dc operation.
+ *
+ * 	op:		operation passed to dc instruction
+ * 	start:          starting virtual address of the region
+ * 	end:            end virtual address of the region
+ *	linesz:		dcache line size
+ * 	Corrupts:       start, end, tmp
+ */
+	.macro dcache_by_myline_op_nosync op, start, end, linesz, tmp
+	sub	\tmp, \linesz, #1
+	bic	\start, \start, \tmp
+.Ldcache_op\@:
+	__dcache_op_line \op, \start
+	add     \start, \start, \linesz
+	cmp     \start, \end
+	b.lo    .Ldcache_op\@
+	.endm
+
 /*
  * Macro to perform a data cache maintenance for the interval
  * [start, end)
@@ -423,6 +448,20 @@ alternative_endif
 	dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
 	.endm
 
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end). It won’t wait for the dc operation to complete.
+ *
+ * 	op:		operation passed to dc instruction
+ * 	start:          starting virtual address of the region
+ * 	end:            end virtual address of the region
+ * 	Corrupts:       start, end, tmp1, tmp2
+ */
+	.macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2
+	dcache_line_size \tmp1, \tmp2
+	dcache_by_myline_op_nosync \op, \start, \end, \tmp1, \tmp2
+	.endm
+
 /*
  * Macro to perform an instruction cache maintenance for the interval
  * [start, end)
-- 
2.39.3 (Apple Git-146)

Re: [PATCH 1/6] arm64: Provide dcache_by_myline_op_nosync helper
Posted by Robin Murphy 1 month, 3 weeks ago
On 2025-12-19 5:36 am, Barry Song wrote:
> From: Barry Song <v-songbaohua@oppo.com>
> 
> dcache_by_myline_op ensures completion of the data cache operations for a
> region, while dcache_by_myline_op_nosync only issues them without waiting.
> This enables deferred synchronization so completion for multiple regions
> can be handled together later.

This is a super-low-level internal macro with only two users... Frankly I'd
just do as below.

Thanks,
Robin.

----->8-----

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index f0ca7196f6fa..26e983c331c5 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -367,18 +367,17 @@ alternative_endif
  	.endm
  
  /*
- * Macro to perform a data cache maintenance for the interval
- * [start, end) with dcache line size explicitly provided.
+ * Main loop for a data cache maintenance operation. Caller to provide the
+ * dcache line size and take care of relevant synchronisation afterwards.
   *
   * 	op:		operation passed to dc instruction
- * 	domain:		domain used in dsb instruction
   * 	start:          starting virtual address of the region
   * 	end:            end virtual address of the region
   *	linesz:		dcache line size
   * 	fixup:		optional label to branch to on user fault
   * 	Corrupts:       start, end, tmp
   */
-	.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
+	.macro raw_dcache_by_line_op op, start, end, linesz, tmp, fixup
  	sub	\tmp, \linesz, #1
  	bic	\start, \start, \tmp
  .Ldcache_op\@:
@@ -402,7 +401,6 @@ alternative_endif
  	add	\start, \start, \linesz
  	cmp	\start, \end
  	b.lo	.Ldcache_op\@
-	dsb	\domain
  
  	_cond_uaccess_extable .Ldcache_op\@, \fixup
  	.endm
@@ -420,7 +418,8 @@ alternative_endif
   */
  	.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
  	dcache_line_size \tmp1, \tmp2
-	dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
+	raw_dcache_by_line_op \op, \start, \end, \tmp1, \tmp2, \fixup
+	dsb \domain
  	.endm
  
  /*
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 413f899e4ac6..efdb6884058e 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel)
  	mov	x19, x13
  	copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
  	add	x1, x19, #PAGE_SIZE
-	dcache_by_myline_op civac, sy, x19, x1, x15, x20
+	raw_dcache_by_line_op civac, x19, x1, x15, x20
+	dsb	sy
  	b	.Lnext
  .Ltest_indirection:
  	tbz	x16, IND_INDIRECTION_BIT, .Ltest_destination
Re: [PATCH 1/6] arm64: Provide dcache_by_myline_op_nosync helper
Posted by Barry Song 1 month, 2 weeks ago
On Fri, Dec 19, 2025 at 8:20 PM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2025-12-19 5:36 am, Barry Song wrote:
> > From: Barry Song <v-songbaohua@oppo.com>
> >
> > dcache_by_myline_op ensures completion of the data cache operations for a
> > region, while dcache_by_myline_op_nosync only issues them without waiting.
> > This enables deferred synchronization so completion for multiple regions
> > can be handled together later.
>
> This is a super-low-level internal macro with only two users... Frankly I'd
> just do as below.
>
> Thanks,
> Robin.
>
> ----->8-----
>
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index f0ca7196f6fa..26e983c331c5 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -367,18 +367,17 @@ alternative_endif
>         .endm
>
>   /*
> - * Macro to perform a data cache maintenance for the interval
> - * [start, end) with dcache line size explicitly provided.
> + * Main loop for a data cache maintenance operation. Caller to provide the
> + * dcache line size and take care of relevant synchronisation afterwards.
>    *
>    *    op:             operation passed to dc instruction
> - *     domain:         domain used in dsb instruction
>    *    start:          starting virtual address of the region
>    *    end:            end virtual address of the region
>    *    linesz:         dcache line size
>    *    fixup:          optional label to branch to on user fault
>    *    Corrupts:       start, end, tmp
>    */
> -       .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
> +       .macro raw_dcache_by_line_op op, start, end, linesz, tmp, fixup
>         sub     \tmp, \linesz, #1
>         bic     \start, \start, \tmp
>   .Ldcache_op\@:
> @@ -402,7 +401,6 @@ alternative_endif
>         add     \start, \start, \linesz
>         cmp     \start, \end
>         b.lo    .Ldcache_op\@
> -       dsb     \domain
>
>         _cond_uaccess_extable .Ldcache_op\@, \fixup
>         .endm
> @@ -420,7 +418,8 @@ alternative_endif
>    */
>         .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
>         dcache_line_size \tmp1, \tmp2
> -       dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
> +       raw_dcache_by_line_op \op, \start, \end, \tmp1, \tmp2, \fixup
> +       dsb \domain
>         .endm
>
>   /*
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> index 413f899e4ac6..efdb6884058e 100644
> --- a/arch/arm64/kernel/relocate_kernel.S
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel)
>         mov     x19, x13
>         copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
>         add     x1, x19, #PAGE_SIZE
> -       dcache_by_myline_op civac, sy, x19, x1, x15, x20
> +       raw_dcache_by_line_op civac, x19, x1, x15, x20
> +       dsb     sy
>         b       .Lnext
>   .Ltest_indirection:
>         tbz     x16, IND_INDIRECTION_BIT, .Ltest_destination
>

Thanks, Robin.  Really much better!
dcache_by_line_op_nosync could be:

/*
 * Macro to perform a data cache maintenance for the interval
 * [start, end) without waiting for completion
 *
 *      op:             operation passed to dc instruction
 *      start:          starting virtual address of the region
 *      end:            end virtual address of the region
 *      fixup:          optional label to branch to on user fault
 *      Corrupts:       start, end, tmp1, tmp2
 */
        .macro dcache_by_line_op_nosync op, start, end, tmp1, tmp2, fixup
        dcache_line_size \tmp1, \tmp2
        raw_dcache_by_myline_op \op, \start, \end, \tmp1, \tmp2, \fixup
        .endm

Thanks
Barry