FRED introduced new fields in the host-state area of the VMCS for
stack levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively
corresponding to per CPU exception stacks for #DB, NMI and #DF.
KVM must populate these each time a vCPU is loaded onto a CPU.
Convert the __this_cpu_ist_{top,bottom}_va() macros into real
functions and export __this_cpu_ist_top_va().
Suggested-by: Christoph Hellwig <hch@infradead.org>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Tested-by: Xuelian Guo <xuelian.guo@intel.com>
---
Change in v5:
* Export accessor instead of data (Christoph Hellwig).
* Add TB from Xuelian Guo.
Change in v4:
* Rewrite the change log and add comments to the export (Dave Hansen).
---
arch/x86/coco/sev/sev-nmi.c | 4 ++--
arch/x86/coco/sev/vc-handle.c | 2 +-
arch/x86/include/asm/cpu_entry_area.h | 17 ++++-------------
arch/x86/kernel/cpu/common.c | 10 +++++-----
arch/x86/kernel/fred.c | 6 +++---
arch/x86/kernel/traps.c | 2 +-
arch/x86/mm/cpu_entry_area.c | 21 +++++++++++++++++++++
arch/x86/mm/fault.c | 2 +-
8 files changed, 38 insertions(+), 26 deletions(-)
diff --git a/arch/x86/coco/sev/sev-nmi.c b/arch/x86/coco/sev/sev-nmi.c
index d8dfaddfb367..73e34ad7a1a9 100644
--- a/arch/x86/coco/sev/sev-nmi.c
+++ b/arch/x86/coco/sev/sev-nmi.c
@@ -30,7 +30,7 @@ static __always_inline bool on_vc_stack(struct pt_regs *regs)
if (ip_within_syscall_gap(regs))
return false;
- return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
+ return ((sp >= __this_cpu_ist_bottom_va(ESTACK_VC)) && (sp < __this_cpu_ist_top_va(ESTACK_VC)));
}
/*
@@ -82,7 +82,7 @@ void noinstr __sev_es_ist_exit(void)
/* Read IST entry */
ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
- if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
+ if (WARN_ON(ist == __this_cpu_ist_top_va(ESTACK_VC)))
return;
/* Read back old IST entry and write it to the TSS */
diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c
index c3b4acbde0d8..88b6bc518a5a 100644
--- a/arch/x86/coco/sev/vc-handle.c
+++ b/arch/x86/coco/sev/vc-handle.c
@@ -859,7 +859,7 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
static __always_inline bool is_vc2_stack(unsigned long sp)
{
- return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
+ return (sp >= __this_cpu_ist_bottom_va(ESTACK_VC2) && sp < __this_cpu_ist_top_va(ESTACK_VC2));
}
static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 462fc34f1317..8e17f0ca74e6 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -46,7 +46,7 @@ struct cea_exception_stacks {
* The exception stack ordering in [cea_]exception_stacks
*/
enum exception_stack_ordering {
- ESTACK_DF,
+ ESTACK_DF = 0,
ESTACK_NMI,
ESTACK_DB,
ESTACK_MCE,
@@ -58,18 +58,15 @@ enum exception_stack_ordering {
#define CEA_ESTACK_SIZE(st) \
sizeof(((struct cea_exception_stacks *)0)->st## _stack)
-#define CEA_ESTACK_BOT(ceastp, st) \
- ((unsigned long)&(ceastp)->st## _stack)
-
-#define CEA_ESTACK_TOP(ceastp, st) \
- (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
-
#define CEA_ESTACK_OFFS(st) \
offsetof(struct cea_exception_stacks, st## _stack)
#define CEA_ESTACK_PAGES \
(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
+extern unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack);
+extern unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack);
+
#endif
#ifdef CONFIG_X86_32
@@ -144,10 +141,4 @@ static __always_inline struct entry_stack *cpu_entry_stack(int cpu)
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
-#define __this_cpu_ist_top_va(name) \
- CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
-
-#define __this_cpu_ist_bottom_va(name) \
- CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name)
-
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 34a054181c4d..cb14919f92da 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2307,12 +2307,12 @@ static inline void setup_getcpu(int cpu)
static inline void tss_setup_ist(struct tss_struct *tss)
{
/* Set up the per-CPU TSS IST stacks */
- tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
- tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
- tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
- tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+ tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(ESTACK_DF);
+ tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(ESTACK_NMI);
+ tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(ESTACK_DB);
+ tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(ESTACK_MCE);
/* Only mapped when SEV-ES is active */
- tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
+ tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(ESTACK_VC);
}
#else /* CONFIG_X86_64 */
static inline void tss_setup_ist(struct tss_struct *tss) { }
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index 816187da3a47..06d944a3d051 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -87,7 +87,7 @@ void cpu_init_fred_rsps(void)
FRED_STKLVL(X86_TRAP_DF, FRED_DF_STACK_LEVEL));
/* The FRED equivalents to IST stacks... */
- wrmsrq(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
- wrmsrq(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
- wrmsrq(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
+ wrmsrq(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(ESTACK_DB));
+ wrmsrq(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(ESTACK_NMI));
+ wrmsrq(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(ESTACK_DF));
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 36354b470590..5c9c5ebf5e73 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -954,7 +954,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
info.type > STACK_TYPE_EXCEPTION_LAST)
- sp = __this_cpu_ist_top_va(VC2);
+ sp = __this_cpu_ist_top_va(ESTACK_VC2);
sync:
/*
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 575f863f3c75..eedaf103c8ad 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -18,6 +18,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
+/*
+ * FRED introduced new fields in the host-state area of the VMCS for
+ * stack levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively
+ * corresponding to per CPU stacks for #DB, NMI and #DF. KVM must
+ * populate these each time a vCPU is loaded onto a CPU.
+ *
+ * Called from entry code, so must be noinstr.
+ */
+noinstr unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack)
+{
+ unsigned long base = (unsigned long)&(__this_cpu_read(cea_exception_stacks)->DF_stack);
+ return base + EXCEPTION_STKSZ + stack * (EXCEPTION_STKSZ + PAGE_SIZE);
+}
+EXPORT_SYMBOL(__this_cpu_ist_top_va);
+
+noinstr unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack)
+{
+ unsigned long base = (unsigned long)&(__this_cpu_read(cea_exception_stacks)->DF_stack);
+ return base + stack * (EXCEPTION_STKSZ + PAGE_SIZE);
+}
+
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
static __always_inline unsigned int cea_offset(unsigned int cpu)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 998bd807fc7b..1804eb86cc14 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -671,7 +671,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
* and then double-fault, though, because we're likely to
* break the console driver and lose most of the stack dump.
*/
- call_on_stack(__this_cpu_ist_top_va(DF) - sizeof(void*),
+ call_on_stack(__this_cpu_ist_top_va(ESTACK_DF) - sizeof(void*),
handle_stack_overflow,
ASM_CALL_ARG3,
, [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info));
--
2.50.1
On 8/21/25 15:36, Xin Li (Intel) wrote: > FRED introduced new fields in the host-state area of the VMCS for > stack levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively > corresponding to per CPU exception stacks for #DB, NMI and #DF. > KVM must populate these each time a vCPU is loaded onto a CPU. > > Convert the __this_cpu_ist_{top,bottom}_va() macros into real > functions and export __this_cpu_ist_top_va(). > > Suggested-by: Christoph Hellwig <hch@infradead.org> > Suggested-by: Dave Hansen <dave.hansen@intel.com> Nit: I wouldn't use Suggested-by unless the person basically asked for the *entire* patch. Christoph and I were asking for specific bits of this, but neither of us asked for this patch as a whole. > diff --git a/arch/x86/coco/sev/sev-nmi.c b/arch/x86/coco/sev/sev-nmi.c > index d8dfaddfb367..73e34ad7a1a9 100644 > --- a/arch/x86/coco/sev/sev-nmi.c > +++ b/arch/x86/coco/sev/sev-nmi.c > @@ -30,7 +30,7 @@ static __always_inline bool on_vc_stack(struct pt_regs *regs) > if (ip_within_syscall_gap(regs)) > return false; > > - return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); > + return ((sp >= __this_cpu_ist_bottom_va(ESTACK_VC)) && (sp < __this_cpu_ist_top_va(ESTACK_VC))); > } This rename is one of those things that had me scratching my head for a minute. It wasn't obvious at _all_ why the VC=>ESTACK_VC "rename" is necessary. This needs to have been mentioned in the changelog. Better yet would have been to do this in a separate patch because a big chunk of this patch is just rename noise. > /* > @@ -82,7 +82,7 @@ void noinstr __sev_es_ist_exit(void) > /* Read IST entry */ > ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); > > - if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) > + if (WARN_ON(ist == __this_cpu_ist_top_va(ESTACK_VC))) > return; > > /* Read back old IST entry and write it to the TSS */ > diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c > index c3b4acbde0d8..88b6bc518a5a 100644 > --- a/arch/x86/coco/sev/vc-handle.c > +++ b/arch/x86/coco/sev/vc-handle.c > @@ -859,7 +859,7 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, > > static __always_inline bool is_vc2_stack(unsigned long sp) > { > - return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); > + return (sp >= __this_cpu_ist_bottom_va(ESTACK_VC2) && sp < __this_cpu_ist_top_va(ESTACK_VC2)); > } > > static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) > diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h > index 462fc34f1317..8e17f0ca74e6 100644 > --- a/arch/x86/include/asm/cpu_entry_area.h > +++ b/arch/x86/include/asm/cpu_entry_area.h > @@ -46,7 +46,7 @@ struct cea_exception_stacks { > * The exception stack ordering in [cea_]exception_stacks > */ > enum exception_stack_ordering { > - ESTACK_DF, > + ESTACK_DF = 0, > ESTACK_NMI, > ESTACK_DB, > ESTACK_MCE, Is this really required? I thought the first enum was always 0? Is this just trying to ensure that ESTACKS_MEMBERS() defines a matching number of N_EXCEPTION_STACKS stacks? If that's the case, shouldn't this be represented with a BUILD_BUG_ON()? > @@ -58,18 +58,15 @@ enum exception_stack_ordering { > #define CEA_ESTACK_SIZE(st) \ > sizeof(((struct cea_exception_stacks *)0)->st## _stack) > > -#define CEA_ESTACK_BOT(ceastp, st) \ > - ((unsigned long)&(ceastp)->st## _stack) > - > -#define CEA_ESTACK_TOP(ceastp, st) \ > - (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) > - > #define CEA_ESTACK_OFFS(st) \ > offsetof(struct cea_exception_stacks, st## _stack) > > #define CEA_ESTACK_PAGES \ > (sizeof(struct cea_exception_stacks) / PAGE_SIZE) > > +extern unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack); > +extern unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack); > + > #endif > > #ifdef CONFIG_X86_32 > @@ -144,10 +141,4 @@ static __always_inline struct entry_stack *cpu_entry_stack(int cpu) > return &get_cpu_entry_area(cpu)->entry_stack_page.stack; > } > > -#define __this_cpu_ist_top_va(name) \ > - CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name) > - > -#define __this_cpu_ist_bottom_va(name) \ > - CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name) > - > #endif > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c > index 34a054181c4d..cb14919f92da 100644 > --- a/arch/x86/kernel/cpu/common.c > +++ b/arch/x86/kernel/cpu/common.c > @@ -2307,12 +2307,12 @@ static inline void setup_getcpu(int cpu) > static inline void tss_setup_ist(struct tss_struct *tss) > { > /* Set up the per-CPU TSS IST stacks */ > - tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); > - tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); > - tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); > - tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); > + tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(ESTACK_DF); > + tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(ESTACK_NMI); > + tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(ESTACK_DB); > + tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(ESTACK_MCE); If you respin this, please vertically align these. > +/* > + * FRED introduced new fields in the host-state area of the VMCS for > + * stack levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively > + * corresponding to per CPU stacks for #DB, NMI and #DF. KVM must > + * populate these each time a vCPU is loaded onto a CPU. > + * > + * Called from entry code, so must be noinstr. > + */ > +noinstr unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack) > +{ > + unsigned long base = (unsigned long)&(__this_cpu_read(cea_exception_stacks)->DF_stack); > + return base + EXCEPTION_STKSZ + stack * (EXCEPTION_STKSZ + PAGE_SIZE); > +} > +EXPORT_SYMBOL(__this_cpu_ist_top_va); > + > +noinstr unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack) > +{ > + unsigned long base = (unsigned long)&(__this_cpu_read(cea_exception_stacks)->DF_stack); > + return base + stack * (EXCEPTION_STKSZ + PAGE_SIZE); > +} These are basically treating 'struct exception_stacks' like an array. There's no type safety or anything here. It's just an open-coded array access. Also, starting with ->DF_stack is a bit goofy looking. It's not obvious (or enforced) that it is stack #0 or at the beginning of the structure. Shouldn't we be _trying_ to make this look like: struct cea_exception_stacks *s; s = __this_cpu_read(cea_exception_stacks); return &s[stack_nr].stack; ? Where 'cea_exception_stacks' is an actual array: struct cea_exception_stacks[N_EXCEPTION_STACKS]; which might need to be embedded in a larger structure to get the 'IST_top_guard' without wasting allocating space for an extra full stack. > static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); > > static __always_inline unsigned int cea_offset(unsigned int cpu) > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > index 998bd807fc7b..1804eb86cc14 100644 > --- a/arch/x86/mm/fault.c > +++ b/arch/x86/mm/fault.c > @@ -671,7 +671,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, > * and then double-fault, though, because we're likely to > * break the console driver and lose most of the stack dump. > */ > - call_on_stack(__this_cpu_ist_top_va(DF) - sizeof(void*), > + call_on_stack(__this_cpu_ist_top_va(ESTACK_DF) - sizeof(void*), > handle_stack_overflow, > ASM_CALL_ARG3, > , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info));
>> Suggested-by: Christoph Hellwig <hch@infradead.org> >> Suggested-by: Dave Hansen <dave.hansen@intel.com> > > Nit: I wouldn't use Suggested-by unless the person basically asked for > the *entire* patch. Christoph and I were asking for specific bits of > this, but neither of us asked for this patch as a whole. I did it because the patch is almost rewritten to export accessors instead of raw data, IOW, the way of doing it is completely changed. But I will remove Suggested-by. > >> diff --git a/arch/x86/coco/sev/sev-nmi.c b/arch/x86/coco/sev/sev-nmi.c >> index d8dfaddfb367..73e34ad7a1a9 100644 >> --- a/arch/x86/coco/sev/sev-nmi.c >> +++ b/arch/x86/coco/sev/sev-nmi.c >> @@ -30,7 +30,7 @@ static __always_inline bool on_vc_stack(struct pt_regs *regs) >> if (ip_within_syscall_gap(regs)) >> return false; >> >> - return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); >> + return ((sp >= __this_cpu_ist_bottom_va(ESTACK_VC)) && (sp < __this_cpu_ist_top_va(ESTACK_VC))); >> } > > This rename is one of those things that had me scratching my head for a > minute. It wasn't obvious at _all_ why the VC=>ESTACK_VC "rename" is > necessary. > > This needs to have been mentioned in the changelog. > > Better yet would have been to do this in a separate patch because a big > chunk of this patch is just rename noise. Sure, will do. >> diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h >> index 462fc34f1317..8e17f0ca74e6 100644 >> --- a/arch/x86/include/asm/cpu_entry_area.h >> +++ b/arch/x86/include/asm/cpu_entry_area.h >> @@ -46,7 +46,7 @@ struct cea_exception_stacks { >> * The exception stack ordering in [cea_]exception_stacks >> */ >> enum exception_stack_ordering { >> - ESTACK_DF, >> + ESTACK_DF = 0, >> ESTACK_NMI, >> ESTACK_DB, >> ESTACK_MCE, > > Is this really required? I thought the first enum was always 0? Is this > just trying to ensure that ESTACKS_MEMBERS() defines a matching number > of N_EXCEPTION_STACKS stacks? > > If that's the case, shouldn't this be represented with a BUILD_BUG_ON()? Will do BUILD_BUG_ON(). > >> @@ -58,18 +58,15 @@ enum exception_stack_ordering { >> #define CEA_ESTACK_SIZE(st) \ >> sizeof(((struct cea_exception_stacks *)0)->st## _stack) >> >> -#define CEA_ESTACK_BOT(ceastp, st) \ >> - ((unsigned long)&(ceastp)->st## _stack) >> - >> -#define CEA_ESTACK_TOP(ceastp, st) \ >> - (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) >> - >> #define CEA_ESTACK_OFFS(st) \ >> offsetof(struct cea_exception_stacks, st## _stack) >> >> #define CEA_ESTACK_PAGES \ >> (sizeof(struct cea_exception_stacks) / PAGE_SIZE) >> >> +extern unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack); >> +extern unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack); >> + >> #endif >> >> #ifdef CONFIG_X86_32 >> @@ -144,10 +141,4 @@ static __always_inline struct entry_stack *cpu_entry_stack(int cpu) >> return &get_cpu_entry_area(cpu)->entry_stack_page.stack; >> } >> >> -#define __this_cpu_ist_top_va(name) \ >> - CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name) >> - >> -#define __this_cpu_ist_bottom_va(name) \ >> - CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name) >> - >> #endif >> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c >> index 34a054181c4d..cb14919f92da 100644 >> --- a/arch/x86/kernel/cpu/common.c >> +++ b/arch/x86/kernel/cpu/common.c >> @@ -2307,12 +2307,12 @@ static inline void setup_getcpu(int cpu) >> static inline void tss_setup_ist(struct tss_struct *tss) >> { >> /* Set up the per-CPU TSS IST stacks */ >> - tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); >> - tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); >> - tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); >> - tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); >> + tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(ESTACK_DF); >> + tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(ESTACK_NMI); >> + tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(ESTACK_DB); >> + tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(ESTACK_MCE); > > If you respin this, please vertically align these. NP. > >> +/* >> + * FRED introduced new fields in the host-state area of the VMCS for >> + * stack levels 1->3 (HOST_IA32_FRED_RSP[123]), each respectively >> + * corresponding to per CPU stacks for #DB, NMI and #DF. KVM must >> + * populate these each time a vCPU is loaded onto a CPU. >> + * >> + * Called from entry code, so must be noinstr. >> + */ >> +noinstr unsigned long __this_cpu_ist_top_va(enum exception_stack_ordering stack) >> +{ >> + unsigned long base = (unsigned long)&(__this_cpu_read(cea_exception_stacks)->DF_stack); >> + return base + EXCEPTION_STKSZ + stack * (EXCEPTION_STKSZ + PAGE_SIZE); >> +} >> +EXPORT_SYMBOL(__this_cpu_ist_top_va); >> + >> +noinstr unsigned long __this_cpu_ist_bottom_va(enum exception_stack_ordering stack) >> +{ >> + unsigned long base = (unsigned long)&(__this_cpu_read(cea_exception_stacks)->DF_stack); >> + return base + stack * (EXCEPTION_STKSZ + PAGE_SIZE); >> +} > > These are basically treating 'struct exception_stacks' like an array. > There's no type safety or anything here. It's just an open-coded array > access. > > Also, starting with ->DF_stack is a bit goofy looking. It's not obvious > (or enforced) that it is stack #0 or at the beginning of the structure. > > Shouldn't we be _trying_ to make this look like: > > struct cea_exception_stacks *s; > s = __this_cpu_read(cea_exception_stacks); > > return &s[stack_nr].stack; > > ? > > Where 'cea_exception_stacks' is an actual array: > > struct cea_exception_stacks[N_EXCEPTION_STACKS]; > > which might need to be embedded in a larger structure to get the > 'IST_top_guard' without wasting allocating space for an extra full stack. > Good suggestion! Thanks! Xin
© 2016 - 2025 Red Hat, Inc.