xen/Kconfig | 4 +++ xen/arch/arm/include/asm/alternative.h | 4 +-- xen/arch/arm/include/asm/arm64/flushtlb.h | 4 +-- xen/arch/arm/include/asm/arm64/io.h | 43 ++++++++++++++--------- xen/arch/arm/include/asm/cpuerrata.h | 8 ++--- xen/arch/arm/include/asm/cpufeature.h | 8 ++--- xen/arch/arm/include/asm/page.h | 12 ++++--- xen/arch/arm/include/asm/processor.h | 7 ++-- xen/arch/arm/include/asm/sysregs.h | 10 +++--- xen/arch/arm/mmu/p2m.c | 3 +- xen/arch/x86/include/asm/alternative.h | 36 ++++++++++--------- xen/include/xen/compiler.h | 14 ++++++++ 12 files changed, 95 insertions(+), 58 deletions(-)
Compilers estimate the size of an asm() block for inlining purposes.
Constructs such as ALTERNATIVE appear large due to the metadata, depsite often
only being a handful of instructions. asm inline() overrides the estimation
to identify the block as being small.
This has a substantial impact on inlining decisions, expected to be for the
better given that the compiler has a more accurate picture to work with.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Michal Orzel <michal.orzel@amd.com>
---
xen/Kconfig | 4 +++
xen/arch/arm/include/asm/alternative.h | 4 +--
xen/arch/arm/include/asm/arm64/flushtlb.h | 4 +--
xen/arch/arm/include/asm/arm64/io.h | 43 ++++++++++++++---------
xen/arch/arm/include/asm/cpuerrata.h | 8 ++---
xen/arch/arm/include/asm/cpufeature.h | 8 ++---
xen/arch/arm/include/asm/page.h | 12 ++++---
xen/arch/arm/include/asm/processor.h | 7 ++--
xen/arch/arm/include/asm/sysregs.h | 10 +++---
xen/arch/arm/mmu/p2m.c | 3 +-
xen/arch/x86/include/asm/alternative.h | 36 ++++++++++---------
xen/include/xen/compiler.h | 14 ++++++++
12 files changed, 95 insertions(+), 58 deletions(-)
diff --git a/xen/Kconfig b/xen/Kconfig
index ae1c401a981e..ab4ab42ae987 100644
--- a/xen/Kconfig
+++ b/xen/Kconfig
@@ -29,6 +29,10 @@ config LD_IS_GNU
config LD_IS_LLVM
def_bool $(success,$(LD) --version | head -n 1 | grep -q "^LLD")
+config CC_HAS_ASM_INLINE
+ # GCC >= 9, Clang >= 11
+ def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
+
# Use -f{function,data}-sections compiler parameters
config CC_SPLIT_SECTIONS
bool
diff --git a/xen/arch/arm/include/asm/alternative.h b/xen/arch/arm/include/asm/alternative.h
index 22477d9497a3..1563f03a0f5a 100644
--- a/xen/arch/arm/include/asm/alternative.h
+++ b/xen/arch/arm/include/asm/alternative.h
@@ -209,9 +209,9 @@ alternative_endif
#endif /* __ASSEMBLY__ */
/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ * Usage: asm_inline (ALTERNATIVE(oldinstr, newinstr, feature));
*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * Usage: asm_inline (ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
* N.B. If CONFIG_FOO is specified, but not selected, the whole block
* will be omitted, including oldinstr.
*/
diff --git a/xen/arch/arm/include/asm/arm64/flushtlb.h b/xen/arch/arm/include/asm/arm64/flushtlb.h
index 45642201d147..3b99c11b50d1 100644
--- a/xen/arch/arm/include/asm/arm64/flushtlb.h
+++ b/xen/arch/arm/include/asm/arm64/flushtlb.h
@@ -31,7 +31,7 @@
#define TLB_HELPER(name, tlbop, sh) \
static inline void name(void) \
{ \
- asm volatile( \
+ asm_inline volatile ( \
"dsb " # sh "st;" \
"tlbi " # tlbop ";" \
ALTERNATIVE( \
@@ -55,7 +55,7 @@ static inline void name(void) \
#define TLB_HELPER_VA(name, tlbop) \
static inline void name(vaddr_t va) \
{ \
- asm volatile( \
+ asm_inline volatile ( \
"tlbi " # tlbop ", %0;" \
ALTERNATIVE( \
"nop; nop;", \
diff --git a/xen/arch/arm/include/asm/arm64/io.h b/xen/arch/arm/include/asm/arm64/io.h
index 7d5959877759..ac90b729c44d 100644
--- a/xen/arch/arm/include/asm/arm64/io.h
+++ b/xen/arch/arm/include/asm/arm64/io.h
@@ -51,40 +51,51 @@ static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
static inline u8 __raw_readb(const volatile void __iomem *addr)
{
u8 val;
- asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
- "ldarb %w0, [%1]",
- ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
- : "=r" (val) : "r" (addr));
+
+ asm_inline volatile (
+ ALTERNATIVE("ldrb %w0, [%1]",
+ "ldarb %w0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr) );
+
return val;
}
static inline u16 __raw_readw(const volatile void __iomem *addr)
{
u16 val;
- asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
- "ldarh %w0, [%1]",
- ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
- : "=r" (val) : "r" (addr));
+ asm_inline volatile (
+ ALTERNATIVE("ldrh %w0, [%1]",
+ "ldarh %w0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr) );
+
return val;
}
static inline u32 __raw_readl(const volatile void __iomem *addr)
{
u32 val;
- asm volatile(ALTERNATIVE("ldr %w0, [%1]",
- "ldar %w0, [%1]",
- ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
- : "=r" (val) : "r" (addr));
+
+ asm_inline volatile (
+ ALTERNATIVE("ldr %w0, [%1]",
+ "ldar %w0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr) );
+
return val;
}
static inline u64 __raw_readq(const volatile void __iomem *addr)
{
u64 val;
- asm volatile(ALTERNATIVE("ldr %0, [%1]",
- "ldar %0, [%1]",
- ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
- : "=r" (val) : "r" (addr));
+
+ asm_inline volatile (
+ ALTERNATIVE("ldr %0, [%1]",
+ "ldar %0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr) );
+
return val;
}
diff --git a/xen/arch/arm/include/asm/cpuerrata.h b/xen/arch/arm/include/asm/cpuerrata.h
index 8d7e7b9375bd..1799a16d7e7f 100644
--- a/xen/arch/arm/include/asm/cpuerrata.h
+++ b/xen/arch/arm/include/asm/cpuerrata.h
@@ -16,10 +16,10 @@ static inline bool check_workaround_##erratum(void) \
{ \
register_t ret; \
\
- asm volatile (ALTERNATIVE("mov %0, #0", \
- "mov %0, #1", \
- feature) \
- : "=r" (ret)); \
+ asm_inline volatile ( \
+ ALTERNATIVE("mov %0, #0", \
+ "mov %0, #1", feature) \
+ : "=r" (ret) ); \
\
return unlikely(ret); \
} \
diff --git a/xen/arch/arm/include/asm/cpufeature.h b/xen/arch/arm/include/asm/cpufeature.h
index 50297e53d90e..b6df18801166 100644
--- a/xen/arch/arm/include/asm/cpufeature.h
+++ b/xen/arch/arm/include/asm/cpufeature.h
@@ -102,10 +102,10 @@ static inline bool cpus_have_cap(unsigned int num)
#define cpus_have_const_cap(num) ({ \
register_t __ret; \
\
- asm volatile (ALTERNATIVE("mov %0, #0", \
- "mov %0, #1", \
- num) \
- : "=r" (__ret)); \
+ asm_inline volatile ( \
+ ALTERNATIVE("mov %0, #0", \
+ "mov %0, #1", num) \
+ : "=r" (__ret) ); \
\
unlikely(__ret); \
})
diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h
index 69f817d1e68a..27bc96b9f401 100644
--- a/xen/arch/arm/include/asm/page.h
+++ b/xen/arch/arm/include/asm/page.h
@@ -176,7 +176,8 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
{
size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask);
p = (void *)((uintptr_t)p & ~cacheline_mask);
- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
+ asm_inline volatile (
+ __clean_and_invalidate_dcache_one(0) :: "r" (p) );
p += dcache_line_bytes;
}
@@ -185,7 +186,8 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
asm volatile (__invalidate_dcache_one(0) : : "r" (p + idx));
if ( size > 0 )
- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx));
+ asm_inline volatile (
+ __clean_and_invalidate_dcache_one(0) :: "r" (p + idx) );
dsb(sy); /* So we know the flushes happen before continuing */
@@ -209,7 +211,7 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size)
p = (void *)((uintptr_t)p & ~cacheline_mask);
for ( ; size >= dcache_line_bytes;
idx += dcache_line_bytes, size -= dcache_line_bytes )
- asm volatile (__clean_dcache_one(0) : : "r" (p + idx));
+ asm_inline volatile ( __clean_dcache_one(0) : : "r" (p + idx) );
dsb(sy); /* So we know the flushes happen before continuing */
/* ARM callers assume that dcache_* functions cannot fail. */
return 0;
@@ -247,7 +249,7 @@ static inline int clean_and_invalidate_dcache_va_range
if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \
clean_dcache_va_range(_p, sizeof(x)); \
else \
- asm volatile ( \
+ asm_inline volatile ( \
"dsb sy;" /* Finish all earlier writes */ \
__clean_dcache_one(0) \
"dsb sy;" /* Finish flush before continuing */ \
@@ -259,7 +261,7 @@ static inline int clean_and_invalidate_dcache_va_range
if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \
clean_and_invalidate_dcache_va_range(_p, sizeof(x)); \
else \
- asm volatile ( \
+ asm_inline volatile ( \
"dsb sy;" /* Finish all earlier writes */ \
__clean_and_invalidate_dcache_one(0) \
"dsb sy;" /* Finish flush before continuing */ \
diff --git a/xen/arch/arm/include/asm/processor.h b/xen/arch/arm/include/asm/processor.h
index 60b587db697f..9cbc4f911061 100644
--- a/xen/arch/arm/include/asm/processor.h
+++ b/xen/arch/arm/include/asm/processor.h
@@ -607,9 +607,10 @@ register_t get_default_cptr_flags(void);
#define SYNCHRONIZE_SERROR(feat) \
do { \
ASSERT(local_abort_is_enabled()); \
- asm volatile(ALTERNATIVE("dsb sy; isb", \
- "nop; nop", feat) \
- : : : "memory"); \
+ asm_inline volatile ( \
+ ALTERNATIVE("dsb sy; isb", \
+ "nop; nop", feat) \
+ ::: "memory" ); \
} while (0)
/*
diff --git a/xen/arch/arm/include/asm/sysregs.h b/xen/arch/arm/include/asm/sysregs.h
index 61e30c9e517c..5c2d362be3d8 100644
--- a/xen/arch/arm/include/asm/sysregs.h
+++ b/xen/arch/arm/include/asm/sysregs.h
@@ -22,11 +22,13 @@ static inline register_t read_sysreg_par(void)
* DMB SY before and after accessing it, as part of the workaround for the
* errata 1508412.
*/
- asm volatile(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
- CONFIG_ARM64_ERRATUM_1508412));
+ asm_inline volatile (
+ ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
+ CONFIG_ARM64_ERRATUM_1508412) );
par_el1 = READ_SYSREG64(PAR_EL1);
- asm volatile(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
- CONFIG_ARM64_ERRATUM_1508412));
+ asm_inline volatile (
+ ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
+ CONFIG_ARM64_ERRATUM_1508412) );
return par_el1;
}
diff --git a/xen/arch/arm/mmu/p2m.c b/xen/arch/arm/mmu/p2m.c
index 7642dbc7c55b..d96078f547d5 100644
--- a/xen/arch/arm/mmu/p2m.c
+++ b/xen/arch/arm/mmu/p2m.c
@@ -228,7 +228,8 @@ void p2m_restore_state(struct vcpu *n)
* registers associated to EL1/EL0 translations regime have been
* synchronized.
*/
- asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));
+ asm_inline volatile (
+ ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE) );
WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
index 38472fb58e2d..4c8be51d0e23 100644
--- a/xen/arch/x86/include/asm/alternative.h
+++ b/xen/arch/x86/include/asm/alternative.h
@@ -116,12 +116,13 @@ extern void alternative_branches(void);
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, feature) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+ asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
+ ::: "memory" )
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
- asm volatile (ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
- newinstr2, feature2) \
- : : : "memory")
+ asm_inline volatile ( ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
+ newinstr2, feature2) \
+ ::: "memory" )
/*
* Alternative inline assembly with input.
@@ -133,14 +134,14 @@ extern void alternative_branches(void);
* If you use variable sized constraints like "m" or "g" in the
* replacement make sure to pad to the worst case length.
*/
-#define alternative_input(oldinstr, newinstr, feature, input...) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
- : : input)
+#define alternative_input(oldinstr, newinstr, feature, input...) \
+ asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
+ :: input )
/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
- : output : input)
+#define alternative_io(oldinstr, newinstr, feature, output, input...) \
+ asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
+ : output : input )
/*
* This is similar to alternative_io. But it has two features and
@@ -150,11 +151,11 @@ extern void alternative_branches(void);
* Otherwise, if CPU has feature1, newinstr1 is used.
* Otherwise, oldinstr is used.
*/
-#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \
- feature2, output, input...) \
- asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
- newinstr2, feature2) \
- : output : input)
+#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \
+ feature2, output, input...) \
+ asm_inline volatile ( ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
+ newinstr2, feature2) \
+ : output : input )
/* Use this macro(s) if you need more than one output parameter. */
#define ASM_OUTPUT2(a...) a
@@ -234,8 +235,9 @@ extern void alternative_branches(void);
rettype ret_; \
register unsigned long r10_ asm("r10"); \
register unsigned long r11_ asm("r11"); \
- asm volatile (ALTERNATIVE("call *%c[addr](%%rip)", "call .", \
- X86_FEATURE_ALWAYS) \
+ asm_inline volatile ( \
+ ALTERNATIVE("call *%c[addr](%%rip)", "call .", \
+ X86_FEATURE_ALWAYS) \
: ALT_CALL ## n ## _OUT, "=a" (ret_), \
"=r" (r10_), "=r" (r11_) ASM_CALL_CONSTRAINT \
: [addr] "i" (&(func)), "g" (func) \
diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
index c68fab189154..d9b133016bb6 100644
--- a/xen/include/xen/compiler.h
+++ b/xen/include/xen/compiler.h
@@ -53,6 +53,20 @@
#define unreachable() __builtin_unreachable()
#endif
+/*
+ * Compilers estimate the size of an asm() block for inlining purposes.
+ * Constructs such as ALTERNATIVE appear large due to the metadata, depsite
+ * often only being a handful of instructions. asm inline() overrides the
+ * estimation to identify the block as being small.
+ *
+ * Note: __inline is needed to avoid getting caught up in INIT_SECTIONS_ONLY.
+ */
+#if CONFIG_CC_HAS_ASM_INLINE
+# define asm_inline asm __inline
+#else
+# define asm_inline asm
+#endif
+
/*
* Add the pseudo keyword 'fallthrough' so case statement blocks
* must end with any of these keywords:
--
2.39.5
On Tue, 22 Apr 2025, Andrew Cooper wrote:
> Compilers estimate the size of an asm() block for inlining purposes.
>
> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often
> only being a handful of instructions. asm inline() overrides the estimation
> to identify the block as being small.
>
> This has a substantial impact on inlining decisions, expected to be for the
> better given that the compiler has a more accurate picture to work with.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Hi Andrew,
If we are going to use asm_inline, please add a note to
docs/misra/C-language-toolchain.rst where we keep record of all the
language extensions we use.
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Stefano Stabellini <sstabellini@kernel.org>
> CC: Julien Grall <julien@xen.org>
> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> CC: Bertrand Marquis <bertrand.marquis@arm.com>
> CC: Michal Orzel <michal.orzel@amd.com>
> ---
> xen/Kconfig | 4 +++
> xen/arch/arm/include/asm/alternative.h | 4 +--
> xen/arch/arm/include/asm/arm64/flushtlb.h | 4 +--
> xen/arch/arm/include/asm/arm64/io.h | 43 ++++++++++++++---------
> xen/arch/arm/include/asm/cpuerrata.h | 8 ++---
> xen/arch/arm/include/asm/cpufeature.h | 8 ++---
> xen/arch/arm/include/asm/page.h | 12 ++++---
> xen/arch/arm/include/asm/processor.h | 7 ++--
> xen/arch/arm/include/asm/sysregs.h | 10 +++---
> xen/arch/arm/mmu/p2m.c | 3 +-
> xen/arch/x86/include/asm/alternative.h | 36 ++++++++++---------
> xen/include/xen/compiler.h | 14 ++++++++
> 12 files changed, 95 insertions(+), 58 deletions(-)
>
> diff --git a/xen/Kconfig b/xen/Kconfig
> index ae1c401a981e..ab4ab42ae987 100644
> --- a/xen/Kconfig
> +++ b/xen/Kconfig
> @@ -29,6 +29,10 @@ config LD_IS_GNU
> config LD_IS_LLVM
> def_bool $(success,$(LD) --version | head -n 1 | grep -q "^LLD")
>
> +config CC_HAS_ASM_INLINE
> + # GCC >= 9, Clang >= 11
> + def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
> +
> # Use -f{function,data}-sections compiler parameters
> config CC_SPLIT_SECTIONS
> bool
> diff --git a/xen/arch/arm/include/asm/alternative.h b/xen/arch/arm/include/asm/alternative.h
> index 22477d9497a3..1563f03a0f5a 100644
> --- a/xen/arch/arm/include/asm/alternative.h
> +++ b/xen/arch/arm/include/asm/alternative.h
> @@ -209,9 +209,9 @@ alternative_endif
> #endif /* __ASSEMBLY__ */
>
> /*
> - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
> + * Usage: asm_inline (ALTERNATIVE(oldinstr, newinstr, feature));
> *
> - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
> + * Usage: asm_inline (ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
> * N.B. If CONFIG_FOO is specified, but not selected, the whole block
> * will be omitted, including oldinstr.
> */
> diff --git a/xen/arch/arm/include/asm/arm64/flushtlb.h b/xen/arch/arm/include/asm/arm64/flushtlb.h
> index 45642201d147..3b99c11b50d1 100644
> --- a/xen/arch/arm/include/asm/arm64/flushtlb.h
> +++ b/xen/arch/arm/include/asm/arm64/flushtlb.h
> @@ -31,7 +31,7 @@
> #define TLB_HELPER(name, tlbop, sh) \
> static inline void name(void) \
> { \
> - asm volatile( \
> + asm_inline volatile ( \
> "dsb " # sh "st;" \
> "tlbi " # tlbop ";" \
> ALTERNATIVE( \
> @@ -55,7 +55,7 @@ static inline void name(void) \
> #define TLB_HELPER_VA(name, tlbop) \
> static inline void name(vaddr_t va) \
> { \
> - asm volatile( \
> + asm_inline volatile ( \
> "tlbi " # tlbop ", %0;" \
> ALTERNATIVE( \
> "nop; nop;", \
> diff --git a/xen/arch/arm/include/asm/arm64/io.h b/xen/arch/arm/include/asm/arm64/io.h
> index 7d5959877759..ac90b729c44d 100644
> --- a/xen/arch/arm/include/asm/arm64/io.h
> +++ b/xen/arch/arm/include/asm/arm64/io.h
> @@ -51,40 +51,51 @@ static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
> static inline u8 __raw_readb(const volatile void __iomem *addr)
> {
> u8 val;
> - asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
> - "ldarb %w0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> +
> + asm_inline volatile (
> + ALTERNATIVE("ldrb %w0, [%1]",
> + "ldarb %w0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> static inline u16 __raw_readw(const volatile void __iomem *addr)
> {
> u16 val;
> - asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
> - "ldarh %w0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> + asm_inline volatile (
> + ALTERNATIVE("ldrh %w0, [%1]",
> + "ldarh %w0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> static inline u32 __raw_readl(const volatile void __iomem *addr)
> {
> u32 val;
> - asm volatile(ALTERNATIVE("ldr %w0, [%1]",
> - "ldar %w0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> +
> + asm_inline volatile (
> + ALTERNATIVE("ldr %w0, [%1]",
> + "ldar %w0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> static inline u64 __raw_readq(const volatile void __iomem *addr)
> {
> u64 val;
> - asm volatile(ALTERNATIVE("ldr %0, [%1]",
> - "ldar %0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> +
> + asm_inline volatile (
> + ALTERNATIVE("ldr %0, [%1]",
> + "ldar %0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> diff --git a/xen/arch/arm/include/asm/cpuerrata.h b/xen/arch/arm/include/asm/cpuerrata.h
> index 8d7e7b9375bd..1799a16d7e7f 100644
> --- a/xen/arch/arm/include/asm/cpuerrata.h
> +++ b/xen/arch/arm/include/asm/cpuerrata.h
> @@ -16,10 +16,10 @@ static inline bool check_workaround_##erratum(void) \
> { \
> register_t ret; \
> \
> - asm volatile (ALTERNATIVE("mov %0, #0", \
> - "mov %0, #1", \
> - feature) \
> - : "=r" (ret)); \
> + asm_inline volatile ( \
> + ALTERNATIVE("mov %0, #0", \
> + "mov %0, #1", feature) \
> + : "=r" (ret) ); \
> \
> return unlikely(ret); \
> } \
> diff --git a/xen/arch/arm/include/asm/cpufeature.h b/xen/arch/arm/include/asm/cpufeature.h
> index 50297e53d90e..b6df18801166 100644
> --- a/xen/arch/arm/include/asm/cpufeature.h
> +++ b/xen/arch/arm/include/asm/cpufeature.h
> @@ -102,10 +102,10 @@ static inline bool cpus_have_cap(unsigned int num)
> #define cpus_have_const_cap(num) ({ \
> register_t __ret; \
> \
> - asm volatile (ALTERNATIVE("mov %0, #0", \
> - "mov %0, #1", \
> - num) \
> - : "=r" (__ret)); \
> + asm_inline volatile ( \
> + ALTERNATIVE("mov %0, #0", \
> + "mov %0, #1", num) \
> + : "=r" (__ret) ); \
> \
> unlikely(__ret); \
> })
> diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h
> index 69f817d1e68a..27bc96b9f401 100644
> --- a/xen/arch/arm/include/asm/page.h
> +++ b/xen/arch/arm/include/asm/page.h
> @@ -176,7 +176,8 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
> {
> size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask);
> p = (void *)((uintptr_t)p & ~cacheline_mask);
> - asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
> + asm_inline volatile (
> + __clean_and_invalidate_dcache_one(0) :: "r" (p) );
> p += dcache_line_bytes;
> }
>
> @@ -185,7 +186,8 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
> asm volatile (__invalidate_dcache_one(0) : : "r" (p + idx));
>
> if ( size > 0 )
> - asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx));
> + asm_inline volatile (
> + __clean_and_invalidate_dcache_one(0) :: "r" (p + idx) );
>
> dsb(sy); /* So we know the flushes happen before continuing */
>
> @@ -209,7 +211,7 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size)
> p = (void *)((uintptr_t)p & ~cacheline_mask);
> for ( ; size >= dcache_line_bytes;
> idx += dcache_line_bytes, size -= dcache_line_bytes )
> - asm volatile (__clean_dcache_one(0) : : "r" (p + idx));
> + asm_inline volatile ( __clean_dcache_one(0) : : "r" (p + idx) );
> dsb(sy); /* So we know the flushes happen before continuing */
> /* ARM callers assume that dcache_* functions cannot fail. */
> return 0;
> @@ -247,7 +249,7 @@ static inline int clean_and_invalidate_dcache_va_range
> if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \
> clean_dcache_va_range(_p, sizeof(x)); \
> else \
> - asm volatile ( \
> + asm_inline volatile ( \
> "dsb sy;" /* Finish all earlier writes */ \
> __clean_dcache_one(0) \
> "dsb sy;" /* Finish flush before continuing */ \
> @@ -259,7 +261,7 @@ static inline int clean_and_invalidate_dcache_va_range
> if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \
> clean_and_invalidate_dcache_va_range(_p, sizeof(x)); \
> else \
> - asm volatile ( \
> + asm_inline volatile ( \
> "dsb sy;" /* Finish all earlier writes */ \
> __clean_and_invalidate_dcache_one(0) \
> "dsb sy;" /* Finish flush before continuing */ \
> diff --git a/xen/arch/arm/include/asm/processor.h b/xen/arch/arm/include/asm/processor.h
> index 60b587db697f..9cbc4f911061 100644
> --- a/xen/arch/arm/include/asm/processor.h
> +++ b/xen/arch/arm/include/asm/processor.h
> @@ -607,9 +607,10 @@ register_t get_default_cptr_flags(void);
> #define SYNCHRONIZE_SERROR(feat) \
> do { \
> ASSERT(local_abort_is_enabled()); \
> - asm volatile(ALTERNATIVE("dsb sy; isb", \
> - "nop; nop", feat) \
> - : : : "memory"); \
> + asm_inline volatile ( \
> + ALTERNATIVE("dsb sy; isb", \
> + "nop; nop", feat) \
> + ::: "memory" ); \
> } while (0)
>
> /*
> diff --git a/xen/arch/arm/include/asm/sysregs.h b/xen/arch/arm/include/asm/sysregs.h
> index 61e30c9e517c..5c2d362be3d8 100644
> --- a/xen/arch/arm/include/asm/sysregs.h
> +++ b/xen/arch/arm/include/asm/sysregs.h
> @@ -22,11 +22,13 @@ static inline register_t read_sysreg_par(void)
> * DMB SY before and after accessing it, as part of the workaround for the
> * errata 1508412.
> */
> - asm volatile(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> - CONFIG_ARM64_ERRATUM_1508412));
> + asm_inline volatile (
> + ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> + CONFIG_ARM64_ERRATUM_1508412) );
> par_el1 = READ_SYSREG64(PAR_EL1);
> - asm volatile(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> - CONFIG_ARM64_ERRATUM_1508412));
> + asm_inline volatile (
> + ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> + CONFIG_ARM64_ERRATUM_1508412) );
>
> return par_el1;
> }
> diff --git a/xen/arch/arm/mmu/p2m.c b/xen/arch/arm/mmu/p2m.c
> index 7642dbc7c55b..d96078f547d5 100644
> --- a/xen/arch/arm/mmu/p2m.c
> +++ b/xen/arch/arm/mmu/p2m.c
> @@ -228,7 +228,8 @@ void p2m_restore_state(struct vcpu *n)
> * registers associated to EL1/EL0 translations regime have been
> * synchronized.
> */
> - asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));
> + asm_inline volatile (
> + ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE) );
> WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
>
> last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
> diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
> index 38472fb58e2d..4c8be51d0e23 100644
> --- a/xen/arch/x86/include/asm/alternative.h
> +++ b/xen/arch/x86/include/asm/alternative.h
> @@ -116,12 +116,13 @@ extern void alternative_branches(void);
> * without volatile and memory clobber.
> */
> #define alternative(oldinstr, newinstr, feature) \
> - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
> + asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
> + ::: "memory" )
>
> #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
> - asm volatile (ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> - newinstr2, feature2) \
> - : : : "memory")
> + asm_inline volatile ( ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> + newinstr2, feature2) \
> + ::: "memory" )
>
> /*
> * Alternative inline assembly with input.
> @@ -133,14 +134,14 @@ extern void alternative_branches(void);
> * If you use variable sized constraints like "m" or "g" in the
> * replacement make sure to pad to the worst case length.
> */
> -#define alternative_input(oldinstr, newinstr, feature, input...) \
> - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
> - : : input)
> +#define alternative_input(oldinstr, newinstr, feature, input...) \
> + asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
> + :: input )
>
> /* Like alternative_input, but with a single output argument */
> -#define alternative_io(oldinstr, newinstr, feature, output, input...) \
> - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
> - : output : input)
> +#define alternative_io(oldinstr, newinstr, feature, output, input...) \
> + asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
> + : output : input )
>
> /*
> * This is similar to alternative_io. But it has two features and
> @@ -150,11 +151,11 @@ extern void alternative_branches(void);
> * Otherwise, if CPU has feature1, newinstr1 is used.
> * Otherwise, oldinstr is used.
> */
> -#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \
> - feature2, output, input...) \
> - asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> - newinstr2, feature2) \
> - : output : input)
> +#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \
> + feature2, output, input...) \
> + asm_inline volatile ( ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> + newinstr2, feature2) \
> + : output : input )
>
> /* Use this macro(s) if you need more than one output parameter. */
> #define ASM_OUTPUT2(a...) a
> @@ -234,8 +235,9 @@ extern void alternative_branches(void);
> rettype ret_; \
> register unsigned long r10_ asm("r10"); \
> register unsigned long r11_ asm("r11"); \
> - asm volatile (ALTERNATIVE("call *%c[addr](%%rip)", "call .", \
> - X86_FEATURE_ALWAYS) \
> + asm_inline volatile ( \
> + ALTERNATIVE("call *%c[addr](%%rip)", "call .", \
> + X86_FEATURE_ALWAYS) \
> : ALT_CALL ## n ## _OUT, "=a" (ret_), \
> "=r" (r10_), "=r" (r11_) ASM_CALL_CONSTRAINT \
> : [addr] "i" (&(func)), "g" (func) \
> diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
> index c68fab189154..d9b133016bb6 100644
> --- a/xen/include/xen/compiler.h
> +++ b/xen/include/xen/compiler.h
> @@ -53,6 +53,20 @@
> #define unreachable() __builtin_unreachable()
> #endif
>
> +/*
> + * Compilers estimate the size of an asm() block for inlining purposes.
> + * Constructs such as ALTERNATIVE appear large due to the metadata, depsite
> + * often only being a handful of instructions. asm inline() overrides the
> + * estimation to identify the block as being small.
> + *
> + * Note: __inline is needed to avoid getting caught up in INIT_SECTIONS_ONLY.
> + */
> +#if CONFIG_CC_HAS_ASM_INLINE
> +# define asm_inline asm __inline
> +#else
> +# define asm_inline asm
> +#endif
> +
> /*
> * Add the pseudo keyword 'fallthrough' so case statement blocks
> * must end with any of these keywords:
> --
> 2.39.5
>
On 22/04/2025 8:46 pm, Stefano Stabellini wrote: > On Tue, 22 Apr 2025, Andrew Cooper wrote: >> Compilers estimate the size of an asm() block for inlining purposes. >> >> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often >> only being a handful of instructions. asm inline() overrides the estimation >> to identify the block as being small. >> >> This has a substantial impact on inlining decisions, expected to be for the >> better given that the compiler has a more accurate picture to work with. >> >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > Hi Andrew, > > If we are going to use asm_inline, please add a note to > docs/misra/C-language-toolchain.rst where we keep record of all the > language extensions we use. It's just asm(), and that's already discussed. I'm not sure what else you think is warranted. ~Andrew
On Tue, 22 Apr 2025, Andrew Cooper wrote: > On 22/04/2025 8:46 pm, Stefano Stabellini wrote: > > On Tue, 22 Apr 2025, Andrew Cooper wrote: > >> Compilers estimate the size of an asm() block for inlining purposes. > >> > >> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often > >> only being a handful of instructions. asm inline() overrides the estimation > >> to identify the block as being small. > >> > >> This has a substantial impact on inlining decisions, expected to be for the > >> better given that the compiler has a more accurate picture to work with. > >> > >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > > Hi Andrew, > > > > If we are going to use asm_inline, please add a note to > > docs/misra/C-language-toolchain.rst where we keep record of all the > > language extensions we use. > > It's just asm(), and that's already discussed. > > I'm not sure what else you think is warranted. If it is just asm() then there is nothing to do. We only need to document extensions to the language, nothing else. Many of them are already documented under docs/misra/C-language-toolchain.rst (see for instance asm and __asm__). I see the critical part of the patch for this question is: > +#if CONFIG_CC_HAS_ASM_INLINE > +# define asm_inline asm __inline > +#else > +# define asm_inline asm > +#endif it looks like __inline is an extension ?
On 22/04/2025 8:58 pm, Stefano Stabellini wrote: > On Tue, 22 Apr 2025, Andrew Cooper wrote: >> On 22/04/2025 8:46 pm, Stefano Stabellini wrote: >>> On Tue, 22 Apr 2025, Andrew Cooper wrote: >>>> Compilers estimate the size of an asm() block for inlining purposes. >>>> >>>> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often >>>> only being a handful of instructions. asm inline() overrides the estimation >>>> to identify the block as being small. >>>> >>>> This has a substantial impact on inlining decisions, expected to be for the >>>> better given that the compiler has a more accurate picture to work with. >>>> >>>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> >>> Hi Andrew, >>> >>> If we are going to use asm_inline, please add a note to >>> docs/misra/C-language-toolchain.rst where we keep record of all the >>> language extensions we use. >> It's just asm(), and that's already discussed. >> >> I'm not sure what else you think is warranted. > > If it is just asm() then there is nothing to do. We only need to > document extensions to the language, nothing else. Many of them are > already documented under docs/misra/C-language-toolchain.rst (see for > instance asm and __asm__). > > I see the critical part of the patch for this question is: > >> +#if CONFIG_CC_HAS_ASM_INLINE >> +# define asm_inline asm __inline >> +#else >> +# define asm_inline asm >> +#endif > it looks like __inline is an extension ? That's just inline. (It's an alternative spelling, and we discuss those too.) The reason it's not plain "asm inline" is because of the (deviated) "#define inline inline __init", where an eventual "asm inline __init (..." is a syntax error. ~Andrew
On 2025-04-22 21:58, Stefano Stabellini wrote: > On Tue, 22 Apr 2025, Andrew Cooper wrote: >> On 22/04/2025 8:46 pm, Stefano Stabellini wrote: >> > On Tue, 22 Apr 2025, Andrew Cooper wrote: >> >> Compilers estimate the size of an asm() block for inlining purposes. >> >> >> >> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often >> >> only being a handful of instructions. asm inline() overrides the estimation >> >> to identify the block as being small. >> >> >> >> This has a substantial impact on inlining decisions, expected to be for the >> >> better given that the compiler has a more accurate picture to work with. >> >> >> >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> >> > Hi Andrew, >> > >> > If we are going to use asm_inline, please add a note to >> > docs/misra/C-language-toolchain.rst where we keep record of all the >> > language extensions we use. >> >> It's just asm(), and that's already discussed. >> >> I'm not sure what else you think is warranted. > > > If it is just asm() then there is nothing to do. We only need to > document extensions to the language, nothing else. Many of them are > already documented under docs/misra/C-language-toolchain.rst (see for > instance asm and __asm__). > > I see the critical part of the patch for this question is: > >> +#if CONFIG_CC_HAS_ASM_INLINE >> +# define asm_inline asm __inline >> +#else >> +# define asm_inline asm >> +#endif > > it looks like __inline is an extension ? I see. It was added to the tool configuration, but not there in the rst (__inline__ as well), hence why there are no CI failures. -- Nicola Vetrini, B.Sc. Software Engineer BUGSENG (https://bugseng.com) LinkedIn: https://www.linkedin.com/in/nicola-vetrini-a42471253
On Tue, 22 Apr 2025, Nicola Vetrini wrote: > On 2025-04-22 21:58, Stefano Stabellini wrote: > > On Tue, 22 Apr 2025, Andrew Cooper wrote: > > > On 22/04/2025 8:46 pm, Stefano Stabellini wrote: > > > > On Tue, 22 Apr 2025, Andrew Cooper wrote: > > > >> Compilers estimate the size of an asm() block for inlining purposes. > > > >> > > > >> Constructs such as ALTERNATIVE appear large due to the metadata, > > > depsite often > > > >> only being a handful of instructions. asm inline() overrides the > > > estimation > > > >> to identify the block as being small. > > > >> > > > >> This has a substantial impact on inlining decisions, expected to be for > > > the > > > >> better given that the compiler has a more accurate picture to work > > > with. > > > >> > > > >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > > > > Hi Andrew, > > > > > > > > If we are going to use asm_inline, please add a note to > > > > docs/misra/C-language-toolchain.rst where we keep record of all the > > > > language extensions we use. > > > > > > It's just asm(), and that's already discussed. > > > > > > I'm not sure what else you think is warranted. > > > > > > If it is just asm() then there is nothing to do. We only need to > > document extensions to the language, nothing else. Many of them are > > already documented under docs/misra/C-language-toolchain.rst (see for > > instance asm and __asm__). > > > > I see the critical part of the patch for this question is: > > > > > +#if CONFIG_CC_HAS_ASM_INLINE > > > +# define asm_inline asm __inline > > > +#else > > > +# define asm_inline asm > > > +#endif > > > > it looks like __inline is an extension ? > > I see. It was added to the tool configuration, but not there in the rst > (__inline__ as well), hence why there are no CI failures. I see __inline__|__inline in toolchain.ecl. Nicola, would you be up for sending a small patch to docs/misra/C-language-toolchain.rst to add them?
On 2025-04-22 22:02, Stefano Stabellini wrote: > On Tue, 22 Apr 2025, Nicola Vetrini wrote: >> On 2025-04-22 21:58, Stefano Stabellini wrote: >> > On Tue, 22 Apr 2025, Andrew Cooper wrote: >> > > On 22/04/2025 8:46 pm, Stefano Stabellini wrote: >> > > > On Tue, 22 Apr 2025, Andrew Cooper wrote: >> > > >> Compilers estimate the size of an asm() block for inlining purposes. >> > > >> >> > > >> Constructs such as ALTERNATIVE appear large due to the metadata, >> > > depsite often >> > > >> only being a handful of instructions. asm inline() overrides the >> > > estimation >> > > >> to identify the block as being small. >> > > >> >> > > >> This has a substantial impact on inlining decisions, expected to be for >> > > the >> > > >> better given that the compiler has a more accurate picture to work >> > > with. >> > > >> >> > > >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> >> > > > Hi Andrew, >> > > > >> > > > If we are going to use asm_inline, please add a note to >> > > > docs/misra/C-language-toolchain.rst where we keep record of all the >> > > > language extensions we use. >> > > >> > > It's just asm(), and that's already discussed. >> > > >> > > I'm not sure what else you think is warranted. >> > >> > >> > If it is just asm() then there is nothing to do. We only need to >> > document extensions to the language, nothing else. Many of them are >> > already documented under docs/misra/C-language-toolchain.rst (see for >> > instance asm and __asm__). >> > >> > I see the critical part of the patch for this question is: >> > >> > > +#if CONFIG_CC_HAS_ASM_INLINE >> > > +# define asm_inline asm __inline >> > > +#else >> > > +# define asm_inline asm >> > > +#endif >> > >> > it looks like __inline is an extension ? >> >> I see. It was added to the tool configuration, but not there in the >> rst >> (__inline__ as well), hence why there are no CI failures. > > I see __inline__|__inline in toolchain.ecl. Nicola, would you be up for > sending a small patch to docs/misra/C-language-toolchain.rst to add > them? Yes, no problem. I'll take the occasion to see whether there are other gaps. Perhaps there's a clever way to ensure these files do not go out of sync? -- Nicola Vetrini, B.Sc. Software Engineer BUGSENG (https://bugseng.com) LinkedIn: https://www.linkedin.com/in/nicola-vetrini-a42471253
On Tue, 22 Apr 2025, Nicola Vetrini wrote: > On 2025-04-22 22:02, Stefano Stabellini wrote: > > On Tue, 22 Apr 2025, Nicola Vetrini wrote: > > > On 2025-04-22 21:58, Stefano Stabellini wrote: > > > > On Tue, 22 Apr 2025, Andrew Cooper wrote: > > > > > On 22/04/2025 8:46 pm, Stefano Stabellini wrote: > > > > > > On Tue, 22 Apr 2025, Andrew Cooper wrote: > > > > > >> Compilers estimate the size of an asm() block for inlining > > > purposes. > > > > > >> > > > > > >> Constructs such as ALTERNATIVE appear large due to the metadata, > > > > > depsite often > > > > > >> only being a handful of instructions. asm inline() overrides the > > > > > estimation > > > > > >> to identify the block as being small. > > > > > >> > > > > > >> This has a substantial impact on inlining decisions, expected to be > > > for > > > > > the > > > > > >> better given that the compiler has a more accurate picture to work > > > > > with. > > > > > >> > > > > > >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > > > > > > Hi Andrew, > > > > > > > > > > > > If we are going to use asm_inline, please add a note to > > > > > > docs/misra/C-language-toolchain.rst where we keep record of all the > > > > > > language extensions we use. > > > > > > > > > > It's just asm(), and that's already discussed. > > > > > > > > > > I'm not sure what else you think is warranted. > > > > > > > > > > > > If it is just asm() then there is nothing to do. We only need to > > > > document extensions to the language, nothing else. Many of them are > > > > already documented under docs/misra/C-language-toolchain.rst (see for > > > > instance asm and __asm__). > > > > > > > > I see the critical part of the patch for this question is: > > > > > > > > > +#if CONFIG_CC_HAS_ASM_INLINE > > > > > +# define asm_inline asm __inline > > > > > +#else > > > > > +# define asm_inline asm > > > > > +#endif > > > > > > > > it looks like __inline is an extension ? > > > > > > I see. It was added to the tool configuration, but not there in the rst > > > (__inline__ as well), hence why there are no CI failures. > > > > I see __inline__|__inline in toolchain.ecl. Nicola, would you be up for > > sending a small patch to docs/misra/C-language-toolchain.rst to add them? > > Yes, no problem. I'll take the occasion to see whether there are other gaps. Thanks! > Perhaps there's a clever way to ensure these files do not go out of sync? It would take a bit of work, but we could have the list of extensions in json (like exclude-list.json or safe.json), then we could generate both C-language-toolchain.rst and toolchain.ecl from the json file.
On 22.04.2025 13:39, Andrew Cooper wrote:
> Compilers estimate the size of an asm() block for inlining purposes.
>
> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often
> only being a handful of instructions. asm inline() overrides the estimation
> to identify the block as being small.
>
> This has a substantial impact on inlining decisions, expected to be for the
> better given that the compiler has a more accurate picture to work with.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
So this is effectively a generalized version of
https://lists.xen.org/archives/html/xen-devel/2022-08/msg00712.html
with ...
> --- a/xen/Kconfig
> +++ b/xen/Kconfig
> @@ -29,6 +29,10 @@ config LD_IS_GNU
> config LD_IS_LLVM
> def_bool $(success,$(LD) --version | head -n 1 | grep -q "^LLD")
>
> +config CC_HAS_ASM_INLINE
> + # GCC >= 9, Clang >= 11
> + def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
... detection once again done in Kconfig (only). I don't think I had any
feedback there; I'm hence having a hard time seeing why I should now like
this any better than mine was liked.
In any event it's clearly increasingly necessary that we finally get the
detection aspect settled.
Jan
On 22/04/2025 1:52 pm, Jan Beulich wrote: > On 22.04.2025 13:39, Andrew Cooper wrote: >> Compilers estimate the size of an asm() block for inlining purposes. >> >> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often >> only being a handful of instructions. asm inline() overrides the estimation >> to identify the block as being small. >> >> This has a substantial impact on inlining decisions, expected to be for the >> better given that the compiler has a more accurate picture to work with. >> >> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > So this is effectively a generalized version of > https://lists.xen.org/archives/html/xen-devel/2022-08/msg00712.html I'm sorry I missed this patch, but my feedback would have been "that's not specific to altcall". ~Andrew
On Tue, Apr 22, 2025 at 12:40 PM Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
>
> Compilers estimate the size of an asm() block for inlining purposes.
>
> Constructs such as ALTERNATIVE appear large due to the metadata, depsite often
> only being a handful of instructions. asm inline() overrides the estimation
> to identify the block as being small.
>
> This has a substantial impact on inlining decisions, expected to be for the
> better given that the compiler has a more accurate picture to work with.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Stefano Stabellini <sstabellini@kernel.org>
> CC: Julien Grall <julien@xen.org>
> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> CC: Bertrand Marquis <bertrand.marquis@arm.com>
> CC: Michal Orzel <michal.orzel@amd.com>
> ---
> xen/Kconfig | 4 +++
> xen/arch/arm/include/asm/alternative.h | 4 +--
> xen/arch/arm/include/asm/arm64/flushtlb.h | 4 +--
> xen/arch/arm/include/asm/arm64/io.h | 43 ++++++++++++++---------
> xen/arch/arm/include/asm/cpuerrata.h | 8 ++---
> xen/arch/arm/include/asm/cpufeature.h | 8 ++---
> xen/arch/arm/include/asm/page.h | 12 ++++---
> xen/arch/arm/include/asm/processor.h | 7 ++--
> xen/arch/arm/include/asm/sysregs.h | 10 +++---
> xen/arch/arm/mmu/p2m.c | 3 +-
> xen/arch/x86/include/asm/alternative.h | 36 ++++++++++---------
> xen/include/xen/compiler.h | 14 ++++++++
> 12 files changed, 95 insertions(+), 58 deletions(-)
>
> diff --git a/xen/Kconfig b/xen/Kconfig
> index ae1c401a981e..ab4ab42ae987 100644
> --- a/xen/Kconfig
> +++ b/xen/Kconfig
> @@ -29,6 +29,10 @@ config LD_IS_GNU
> config LD_IS_LLVM
> def_bool $(success,$(LD) --version | head -n 1 | grep -q "^LLD")
>
> +config CC_HAS_ASM_INLINE
> + # GCC >= 9, Clang >= 11
> + def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
> +
Why not check for "asm __inline" instead of "asm inline" ? The
asm_inline macro below uses "asm __inline".
They are mainly synonyms but it would look more coherent.
> # Use -f{function,data}-sections compiler parameters
> config CC_SPLIT_SECTIONS
> bool
> diff --git a/xen/arch/arm/include/asm/alternative.h b/xen/arch/arm/include/asm/alternative.h
> index 22477d9497a3..1563f03a0f5a 100644
> --- a/xen/arch/arm/include/asm/alternative.h
> +++ b/xen/arch/arm/include/asm/alternative.h
> @@ -209,9 +209,9 @@ alternative_endif
> #endif /* __ASSEMBLY__ */
>
> /*
> - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
> + * Usage: asm_inline (ALTERNATIVE(oldinstr, newinstr, feature));
> *
> - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
> + * Usage: asm_inline (ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
> * N.B. If CONFIG_FOO is specified, but not selected, the whole block
> * will be omitted, including oldinstr.
> */
> diff --git a/xen/arch/arm/include/asm/arm64/flushtlb.h b/xen/arch/arm/include/asm/arm64/flushtlb.h
> index 45642201d147..3b99c11b50d1 100644
> --- a/xen/arch/arm/include/asm/arm64/flushtlb.h
> +++ b/xen/arch/arm/include/asm/arm64/flushtlb.h
> @@ -31,7 +31,7 @@
> #define TLB_HELPER(name, tlbop, sh) \
> static inline void name(void) \
> { \
> - asm volatile( \
> + asm_inline volatile ( \
> "dsb " # sh "st;" \
> "tlbi " # tlbop ";" \
> ALTERNATIVE( \
> @@ -55,7 +55,7 @@ static inline void name(void) \
> #define TLB_HELPER_VA(name, tlbop) \
> static inline void name(vaddr_t va) \
> { \
> - asm volatile( \
> + asm_inline volatile ( \
> "tlbi " # tlbop ", %0;" \
> ALTERNATIVE( \
> "nop; nop;", \
> diff --git a/xen/arch/arm/include/asm/arm64/io.h b/xen/arch/arm/include/asm/arm64/io.h
> index 7d5959877759..ac90b729c44d 100644
> --- a/xen/arch/arm/include/asm/arm64/io.h
> +++ b/xen/arch/arm/include/asm/arm64/io.h
> @@ -51,40 +51,51 @@ static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
> static inline u8 __raw_readb(const volatile void __iomem *addr)
> {
> u8 val;
> - asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
> - "ldarb %w0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> +
> + asm_inline volatile (
> + ALTERNATIVE("ldrb %w0, [%1]",
> + "ldarb %w0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> static inline u16 __raw_readw(const volatile void __iomem *addr)
> {
> u16 val;
> - asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
> - "ldarh %w0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> + asm_inline volatile (
> + ALTERNATIVE("ldrh %w0, [%1]",
> + "ldarh %w0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> static inline u32 __raw_readl(const volatile void __iomem *addr)
> {
> u32 val;
> - asm volatile(ALTERNATIVE("ldr %w0, [%1]",
> - "ldar %w0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> +
> + asm_inline volatile (
> + ALTERNATIVE("ldr %w0, [%1]",
> + "ldar %w0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> static inline u64 __raw_readq(const volatile void __iomem *addr)
> {
> u64 val;
> - asm volatile(ALTERNATIVE("ldr %0, [%1]",
> - "ldar %0, [%1]",
> - ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> - : "=r" (val) : "r" (addr));
> +
> + asm_inline volatile (
> + ALTERNATIVE("ldr %0, [%1]",
> + "ldar %0, [%1]",
> + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
> + : "=r" (val) : "r" (addr) );
> +
> return val;
> }
>
> diff --git a/xen/arch/arm/include/asm/cpuerrata.h b/xen/arch/arm/include/asm/cpuerrata.h
> index 8d7e7b9375bd..1799a16d7e7f 100644
> --- a/xen/arch/arm/include/asm/cpuerrata.h
> +++ b/xen/arch/arm/include/asm/cpuerrata.h
> @@ -16,10 +16,10 @@ static inline bool check_workaround_##erratum(void) \
> { \
> register_t ret; \
> \
> - asm volatile (ALTERNATIVE("mov %0, #0", \
> - "mov %0, #1", \
> - feature) \
> - : "=r" (ret)); \
> + asm_inline volatile ( \
> + ALTERNATIVE("mov %0, #0", \
> + "mov %0, #1", feature) \
> + : "=r" (ret) ); \
> \
> return unlikely(ret); \
> } \
> diff --git a/xen/arch/arm/include/asm/cpufeature.h b/xen/arch/arm/include/asm/cpufeature.h
> index 50297e53d90e..b6df18801166 100644
> --- a/xen/arch/arm/include/asm/cpufeature.h
> +++ b/xen/arch/arm/include/asm/cpufeature.h
> @@ -102,10 +102,10 @@ static inline bool cpus_have_cap(unsigned int num)
> #define cpus_have_const_cap(num) ({ \
> register_t __ret; \
> \
> - asm volatile (ALTERNATIVE("mov %0, #0", \
> - "mov %0, #1", \
> - num) \
> - : "=r" (__ret)); \
> + asm_inline volatile ( \
> + ALTERNATIVE("mov %0, #0", \
> + "mov %0, #1", num) \
> + : "=r" (__ret) ); \
> \
> unlikely(__ret); \
> })
> diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h
> index 69f817d1e68a..27bc96b9f401 100644
> --- a/xen/arch/arm/include/asm/page.h
> +++ b/xen/arch/arm/include/asm/page.h
> @@ -176,7 +176,8 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
> {
> size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask);
> p = (void *)((uintptr_t)p & ~cacheline_mask);
> - asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
> + asm_inline volatile (
> + __clean_and_invalidate_dcache_one(0) :: "r" (p) );
> p += dcache_line_bytes;
> }
>
> @@ -185,7 +186,8 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
> asm volatile (__invalidate_dcache_one(0) : : "r" (p + idx));
>
> if ( size > 0 )
> - asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx));
> + asm_inline volatile (
> + __clean_and_invalidate_dcache_one(0) :: "r" (p + idx) );
>
> dsb(sy); /* So we know the flushes happen before continuing */
>
> @@ -209,7 +211,7 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size)
> p = (void *)((uintptr_t)p & ~cacheline_mask);
> for ( ; size >= dcache_line_bytes;
> idx += dcache_line_bytes, size -= dcache_line_bytes )
> - asm volatile (__clean_dcache_one(0) : : "r" (p + idx));
> + asm_inline volatile ( __clean_dcache_one(0) : : "r" (p + idx) );
> dsb(sy); /* So we know the flushes happen before continuing */
> /* ARM callers assume that dcache_* functions cannot fail. */
> return 0;
> @@ -247,7 +249,7 @@ static inline int clean_and_invalidate_dcache_va_range
> if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \
> clean_dcache_va_range(_p, sizeof(x)); \
> else \
> - asm volatile ( \
> + asm_inline volatile ( \
> "dsb sy;" /* Finish all earlier writes */ \
> __clean_dcache_one(0) \
> "dsb sy;" /* Finish flush before continuing */ \
> @@ -259,7 +261,7 @@ static inline int clean_and_invalidate_dcache_va_range
> if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \
> clean_and_invalidate_dcache_va_range(_p, sizeof(x)); \
> else \
> - asm volatile ( \
> + asm_inline volatile ( \
> "dsb sy;" /* Finish all earlier writes */ \
> __clean_and_invalidate_dcache_one(0) \
> "dsb sy;" /* Finish flush before continuing */ \
> diff --git a/xen/arch/arm/include/asm/processor.h b/xen/arch/arm/include/asm/processor.h
> index 60b587db697f..9cbc4f911061 100644
> --- a/xen/arch/arm/include/asm/processor.h
> +++ b/xen/arch/arm/include/asm/processor.h
> @@ -607,9 +607,10 @@ register_t get_default_cptr_flags(void);
> #define SYNCHRONIZE_SERROR(feat) \
> do { \
> ASSERT(local_abort_is_enabled()); \
> - asm volatile(ALTERNATIVE("dsb sy; isb", \
> - "nop; nop", feat) \
> - : : : "memory"); \
> + asm_inline volatile ( \
> + ALTERNATIVE("dsb sy; isb", \
> + "nop; nop", feat) \
> + ::: "memory" ); \
> } while (0)
>
> /*
> diff --git a/xen/arch/arm/include/asm/sysregs.h b/xen/arch/arm/include/asm/sysregs.h
> index 61e30c9e517c..5c2d362be3d8 100644
> --- a/xen/arch/arm/include/asm/sysregs.h
> +++ b/xen/arch/arm/include/asm/sysregs.h
> @@ -22,11 +22,13 @@ static inline register_t read_sysreg_par(void)
> * DMB SY before and after accessing it, as part of the workaround for the
> * errata 1508412.
> */
> - asm volatile(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> - CONFIG_ARM64_ERRATUM_1508412));
> + asm_inline volatile (
> + ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> + CONFIG_ARM64_ERRATUM_1508412) );
> par_el1 = READ_SYSREG64(PAR_EL1);
> - asm volatile(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> - CONFIG_ARM64_ERRATUM_1508412));
> + asm_inline volatile (
> + ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412,
> + CONFIG_ARM64_ERRATUM_1508412) );
>
> return par_el1;
> }
> diff --git a/xen/arch/arm/mmu/p2m.c b/xen/arch/arm/mmu/p2m.c
> index 7642dbc7c55b..d96078f547d5 100644
> --- a/xen/arch/arm/mmu/p2m.c
> +++ b/xen/arch/arm/mmu/p2m.c
> @@ -228,7 +228,8 @@ void p2m_restore_state(struct vcpu *n)
> * registers associated to EL1/EL0 translations regime have been
> * synchronized.
> */
> - asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));
> + asm_inline volatile (
> + ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE) );
> WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
>
> last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
> diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h
> index 38472fb58e2d..4c8be51d0e23 100644
> --- a/xen/arch/x86/include/asm/alternative.h
> +++ b/xen/arch/x86/include/asm/alternative.h
> @@ -116,12 +116,13 @@ extern void alternative_branches(void);
> * without volatile and memory clobber.
> */
> #define alternative(oldinstr, newinstr, feature) \
> - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
> + asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
> + ::: "memory" )
>
> #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
> - asm volatile (ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> - newinstr2, feature2) \
> - : : : "memory")
> + asm_inline volatile ( ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> + newinstr2, feature2) \
> + ::: "memory" )
>
> /*
> * Alternative inline assembly with input.
> @@ -133,14 +134,14 @@ extern void alternative_branches(void);
> * If you use variable sized constraints like "m" or "g" in the
> * replacement make sure to pad to the worst case length.
> */
> -#define alternative_input(oldinstr, newinstr, feature, input...) \
> - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
> - : : input)
> +#define alternative_input(oldinstr, newinstr, feature, input...) \
> + asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
> + :: input )
>
> /* Like alternative_input, but with a single output argument */
> -#define alternative_io(oldinstr, newinstr, feature, output, input...) \
> - asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
> - : output : input)
> +#define alternative_io(oldinstr, newinstr, feature, output, input...) \
> + asm_inline volatile ( ALTERNATIVE(oldinstr, newinstr, feature) \
> + : output : input )
>
> /*
> * This is similar to alternative_io. But it has two features and
> @@ -150,11 +151,11 @@ extern void alternative_branches(void);
> * Otherwise, if CPU has feature1, newinstr1 is used.
> * Otherwise, oldinstr is used.
> */
> -#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \
> - feature2, output, input...) \
> - asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> - newinstr2, feature2) \
> - : output : input)
> +#define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \
> + feature2, output, input...) \
> + asm_inline volatile ( ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
> + newinstr2, feature2) \
> + : output : input )
>
> /* Use this macro(s) if you need more than one output parameter. */
> #define ASM_OUTPUT2(a...) a
> @@ -234,8 +235,9 @@ extern void alternative_branches(void);
> rettype ret_; \
> register unsigned long r10_ asm("r10"); \
> register unsigned long r11_ asm("r11"); \
> - asm volatile (ALTERNATIVE("call *%c[addr](%%rip)", "call .", \
> - X86_FEATURE_ALWAYS) \
> + asm_inline volatile ( \
> + ALTERNATIVE("call *%c[addr](%%rip)", "call .", \
> + X86_FEATURE_ALWAYS) \
> : ALT_CALL ## n ## _OUT, "=a" (ret_), \
> "=r" (r10_), "=r" (r11_) ASM_CALL_CONSTRAINT \
> : [addr] "i" (&(func)), "g" (func) \
> diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
> index c68fab189154..d9b133016bb6 100644
> --- a/xen/include/xen/compiler.h
> +++ b/xen/include/xen/compiler.h
> @@ -53,6 +53,20 @@
> #define unreachable() __builtin_unreachable()
> #endif
>
> +/*
> + * Compilers estimate the size of an asm() block for inlining purposes.
> + * Constructs such as ALTERNATIVE appear large due to the metadata, depsite
> + * often only being a handful of instructions. asm inline() overrides the
> + * estimation to identify the block as being small.
> + *
> + * Note: __inline is needed to avoid getting caught up in INIT_SECTIONS_ONLY.
> + */
> +#if CONFIG_CC_HAS_ASM_INLINE
> +# define asm_inline asm __inline
Here ^^^
> +#else
> +# define asm_inline asm
> +#endif
> +
> /*
> * Add the pseudo keyword 'fallthrough' so case statement blocks
> * must end with any of these keywords:
Frediano
© 2016 - 2025 Red Hat, Inc.