[PATCH v7 05/21] x86/cea: Export API for per-CPU exception stacks for KVM

Xin Li (Intel) posted 21 patches 1 month ago
[PATCH v7 05/21] x86/cea: Export API for per-CPU exception stacks for KVM
Posted by Xin Li (Intel) 1 month ago
Convert the __this_cpu_ist_{top,bottom}_va() macros into proper functions,
and export __this_cpu_ist_top_va() to allow KVM to retrieve the top of the
per-CPU exception stack.

FRED introduced new fields in the host-state area of the VMCS for stack
levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively corresponding to
per-CPU exception stacks for #DB, NMI and #DF.  KVM must populate these
fields each time a vCPU is loaded onto a CPU.

To simplify access to the exception stacks in struct cea_exception_stacks,
a union is used to create an array alias, enabling array-style indexing of
the stack entries.

Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---

Change in v7:
* Remove Suggested-bys (Dave Hansen).
* Move rename code in a separate patch (Dave Hansen).
* Access cea_exception_stacks using array indexing (Dave Hansen).
* Use BUILD_BUG_ON(ESTACK_DF != 0) to ensure the starting index is 0
  (Dave Hansen).

Change in v5:
* Export accessor instead of data (Christoph Hellwig).
* Add TB from Xuelian Guo.

Change in v4:
* Rewrite the change log and add comments to the export (Dave Hansen).
---
 arch/x86/include/asm/cpu_entry_area.h | 51 +++++++++++++--------------
 arch/x86/mm/cpu_entry_area.c          | 25 +++++++++++++
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index d0f884c28178..58cd71144e5e 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -16,6 +16,19 @@
 #define VC_EXCEPTION_STKSZ	0
 #endif
 
+/*
+ * The exception stack ordering in [cea_]exception_stacks
+ */
+enum exception_stack_ordering {
+	ESTACK_DF,
+	ESTACK_NMI,
+	ESTACK_DB,
+	ESTACK_MCE,
+	ESTACK_VC,
+	ESTACK_VC2,
+	N_EXCEPTION_STACKS
+};
+
 /* Macro to enforce the same ordering and stack sizes */
 #define ESTACKS_MEMBERS(guardsize, optional_stack_size)		\
 	char	ESTACK_DF_stack_guard[guardsize];		\
@@ -39,37 +52,29 @@ struct exception_stacks {
 
 /* The effective cpu entry area mapping with guard pages. */
 struct cea_exception_stacks {
-	ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
-};
-
-/*
- * The exception stack ordering in [cea_]exception_stacks
- */
-enum exception_stack_ordering {
-	ESTACK_DF,
-	ESTACK_NMI,
-	ESTACK_DB,
-	ESTACK_MCE,
-	ESTACK_VC,
-	ESTACK_VC2,
-	N_EXCEPTION_STACKS
+	union{
+		struct {
+			ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
+		};
+		struct {
+			char stack_guard[PAGE_SIZE];
+			char stack[EXCEPTION_STKSZ];
+		} event_stacks[N_EXCEPTION_STACKS];
+	};
 };
 
 #define CEA_ESTACK_SIZE(st)					\
 	sizeof(((struct cea_exception_stacks *)0)->st## _stack)
 
-#define CEA_ESTACK_BOT(ceastp, st)				\
-	((unsigned long)&(ceastp)->st## _stack)
-
-#define CEA_ESTACK_TOP(ceastp, st)				\
-	(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
-
 #define CEA_ESTACK_OFFS(st)					\
 	offsetof(struct cea_exception_stacks, st## _stack)
 
 #define CEA_ESTACK_PAGES					\
 	(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
 
+extern unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack);
+extern unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack);
+
 #endif
 
 #ifdef CONFIG_X86_32
@@ -144,10 +149,4 @@ static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
 	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
 }
 
-#define __this_cpu_ist_top_va(name)					\
-	CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
-
-#define __this_cpu_ist_bottom_va(name)					\
-	CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name)
-
 #endif
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 9fa371af8abc..595c2e03ddd5 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -18,6 +18,31 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
 DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
 
+/*
+ * FRED introduced new fields in the host-state area of the VMCS for
+ * stack levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively
+ * corresponding to per CPU stacks for #DB, NMI and #DF.  KVM must
+ * populate these each time a vCPU is loaded onto a CPU.
+ *
+ * Called from entry code, so must be noinstr.
+ */
+noinstr unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack)
+{
+	struct cea_exception_stacks *s;
+
+	BUILD_BUG_ON(ESTACK_DF != 0);
+
+	s = __this_cpu_read(cea_exception_stacks);
+
+	return (unsigned long)&s->event_stacks[stack].stack;
+}
+
+noinstr unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack)
+{
+	return __this_cpu_ist_bottom_va(stack) + EXCEPTION_STKSZ;
+}
+EXPORT_SYMBOL(__this_cpu_ist_top_va);
+
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
 
 static __always_inline unsigned int cea_offset(unsigned int cpu)
-- 
2.51.0
Re: [PATCH v7 05/21] x86/cea: Export API for per-CPU exception stacks for KVM
Posted by Xin Li 1 month ago
On 8/29/2025 8:31 AM, Xin Li (Intel) wrote:
> Convert the __this_cpu_ist_{top,bottom}_va() macros into proper functions,
> and export __this_cpu_ist_top_va() to allow KVM to retrieve the top of the
> per-CPU exception stack.
> 
> FRED introduced new fields in the host-state area of the VMCS for stack
> levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively corresponding to
> per-CPU exception stacks for #DB, NMI and #DF.  KVM must populate these
> fields each time a vCPU is loaded onto a CPU.
> 
> To simplify access to the exception stacks in struct cea_exception_stacks,
> a union is used to create an array alias, enabling array-style indexing of
> the stack entries.

After introducing array-style indexing, we can further simplify the code by
removing ESTACKS_MEMBERS() from struct cea_exception_stacks, as done in the
following patch.  However, including this change in the current patch set
may be distracting, so I plan to submit it separately at a later time.


commit b305b83ab90c77242030727139c9b2e04f4de11e
Author: Xin Li (Intel) <xin@zytor.com>
Date:   Fri Aug 29 12:22:35 2025 -0400

     x86/cea: Simplify cea_exception_stacks by removing ESTACKS_MEMBERS()

     With most accesses to cea_exception_stacks now using array-style indexing,
     the ESTACKS_MEMBERS() macro is no longer necessary in cea_exception_stacks
     and can be removed to streamline the structure and improve code 
readability.

     Remove the CEA_ESTACK_SIZE macro, which redundantly defines 
EXCEPTION_STKSZ.

     Signed-off-by: Xin Li (Intel) <xin@zytor.com>

diff --git a/arch/x86/include/asm/cpu_entry_area.h 
b/arch/x86/include/asm/cpu_entry_area.h
index 58cd71144e5e..509e52fc3a0f 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -52,22 +52,15 @@ struct exception_stacks {

  /* The effective cpu entry area mapping with guard pages. */
  struct cea_exception_stacks {
-	union{
-		struct {
-			ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
-		};
-		struct {
-			char stack_guard[PAGE_SIZE];
-			char stack[EXCEPTION_STKSZ];
-		} event_stacks[N_EXCEPTION_STACKS];
-	};
+	struct {
+		char stack_guard[PAGE_SIZE];
+		char stack[EXCEPTION_STKSZ];
+	} event_stacks[N_EXCEPTION_STACKS];
+	char IST_top_guard[PAGE_SIZE];
  };

-#define CEA_ESTACK_SIZE(st)					\
-	sizeof(((struct cea_exception_stacks *)0)->st## _stack)
-
  #define CEA_ESTACK_OFFS(st)					\
-	offsetof(struct cea_exception_stacks, st## _stack)
+	offsetof(struct cea_exception_stacks, event_stacks[st].stack)

  #define CEA_ESTACK_PAGES					\
  	(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 40f51e278171..93b10b264e53 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -70,9 +70,9 @@ struct estack_pages {

  #define EPAGERANGE(st)							\
  	[PFN_DOWN(CEA_ESTACK_OFFS(st)) ...				\
-	 PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = {	\
+	 PFN_DOWN(CEA_ESTACK_OFFS(st) + EXCEPTION_STKSZ - 1)] = {	\
  		.offs	= CEA_ESTACK_OFFS(st),				\
-		.size	= CEA_ESTACK_SIZE(st),				\
+		.size	= EXCEPTION_STKSZ,				\
  		.type	= STACK_TYPE_EXCEPTION + st, }

  /*
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 595c2e03ddd5..de0deb8b824c 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -157,7 +157,7 @@ static void __init percpu_setup_debug_store(unsigned 
int cpu)

  #define cea_map_stack(name) do {					\
  	npages = sizeof(estacks->name## _stack) / PAGE_SIZE;		\
-	cea_map_percpu_pages(cea->estacks.name## _stack,		\
+	cea_map_percpu_pages(cea->estacks.event_stacks[name].stack,	\
  			estacks->name## _stack, npages, PAGE_KERNEL);	\
  	} while (0)