[PATCH v2 2/5] x86/IDT: Make idt_tables[] be per_cpu(idt)

Andrew Cooper posted 5 patches 4 days, 21 hours ago
[PATCH v2 2/5] x86/IDT: Make idt_tables[] be per_cpu(idt)
Posted by Andrew Cooper 4 days, 21 hours ago
This can be a plain per_cpu() variable, and __read_mostly seeing as it's
allocated once and never touched again.

This removes a NR_CPU's sized structure, and improves NUMA locality of access
for both the the VT-x and SVM context switch paths.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/cpu/common.c      |  5 +++--
 xen/arch/x86/crash.c           |  8 ++++----
 xen/arch/x86/domain.c          |  2 +-
 xen/arch/x86/hvm/svm/svm.c     |  4 ++--
 xen/arch/x86/hvm/vmx/vmcs.c    |  2 +-
 xen/arch/x86/include/asm/idt.h |  3 ++-
 xen/arch/x86/machine_kexec.c   |  7 +++++--
 xen/arch/x86/smpboot.c         | 14 +++++++-------
 xen/arch/x86/traps-setup.c     |  2 ++
 xen/arch/x86/traps.c           |  5 +----
 10 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index e8b355ebcf36..e8d4ca3203be 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -819,6 +819,7 @@ void load_system_tables(void)
 	 * support using ARRAY_SIZE against per-cpu variables.
 	 */
 	struct tss_page *tss_page = &this_cpu(tss_page);
+	idt_entry_t *idt = this_cpu(idt);
 
 	/* The TSS may be live.	 Disuade any clever optimisations. */
 	volatile struct tss64 *tss = &tss_page->tss;
@@ -830,7 +831,7 @@ void load_system_tables(void)
 		.limit = LAST_RESERVED_GDT_BYTE,
 	};
 	const struct desc_ptr idtr = {
-		.base = (unsigned long)idt_tables[cpu],
+		.base = (unsigned long)idt,
 		.limit = sizeof(bsp_idt) - 1,
 	};
 
@@ -914,7 +915,7 @@ void load_system_tables(void)
 	ltr(TSS_SELECTOR);
 	lldt(0);
 
-	enable_each_ist(idt_tables[cpu]);
+	enable_each_ist(idt);
 
 	/*
 	 * Bottom-of-stack must be 16-byte aligned!
diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index 5f7d7b392a1f..1e4b0eeff21b 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -63,7 +63,7 @@ static int noreturn cf_check do_nmi_crash(
          * This update is safe from a security point of view, as this
          * pcpu is never going to try to sysret back to a PV vcpu.
          */
-        set_ist(&idt_tables[cpu][X86_EXC_MC], IST_NONE);
+        set_ist(&per_cpu(idt, cpu)[X86_EXC_MC], IST_NONE);
 
         kexec_crash_save_cpu();
         __stop_this_cpu();
@@ -120,6 +120,7 @@ static void nmi_shootdown_cpus(void)
 {
     unsigned long msecs;
     unsigned int cpu = smp_processor_id();
+    idt_entry_t *idt = this_cpu(idt);
 
     disable_lapic_nmi_watchdog();
     local_irq_disable();
@@ -133,9 +134,8 @@ static void nmi_shootdown_cpus(void)
      * Disable IST for MCEs to avoid stack corruption race conditions, and
      * change the NMI handler to a nop to avoid deviation from this codepath.
      */
-    _set_gate_lower(&idt_tables[cpu][X86_EXC_NMI],
-                    SYS_DESC_irq_gate, 0, &trap_nop);
-    set_ist(&idt_tables[cpu][X86_EXC_MC], IST_NONE);
+    _set_gate_lower(&idt[X86_EXC_NMI], SYS_DESC_irq_gate, 0, &trap_nop);
+    set_ist(&idt[X86_EXC_MC], IST_NONE);
 
     set_nmi_callback(do_nmi_crash);
     smp_send_nmi_allbutself();
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index d3db76833f3c..a42fa5480593 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -116,7 +116,7 @@ void play_dead(void)
     local_irq_disable();
 
     /* Change the NMI handler to a nop (see comment below). */
-    _set_gate_lower(&idt_tables[cpu][X86_EXC_NMI], SYS_DESC_irq_gate, 0,
+    _set_gate_lower(&this_cpu(idt)[X86_EXC_NMI], SYS_DESC_irq_gate, 0,
                     &trap_nop);
 
     /*
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index ea78da4f4210..4eac89964f61 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -915,7 +915,7 @@ static void cf_check svm_ctxt_switch_from(struct vcpu *v)
     svm_vmload_pa(per_cpu(host_vmcb, cpu));
 
     /* Resume use of ISTs now that the host TR is reinstated. */
-    enable_each_ist(idt_tables[cpu]);
+    enable_each_ist(per_cpu(idt, cpu));
 
     /*
      * Possibly clear previous guest selection of SSBD if set.  Note that
@@ -944,7 +944,7 @@ static void cf_check svm_ctxt_switch_to(struct vcpu *v)
      * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
      * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
      */
-    disable_each_ist(idt_tables[cpu]);
+    disable_each_ist(per_cpu(idt, cpu));
 
     svm_restore_dr(v);
 
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 20ab2d0f266f..e47a6e1542b7 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -917,7 +917,7 @@ static void vmx_set_host_env(struct vcpu *v)
 
     __vmwrite(HOST_GDTR_BASE,
               (unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY));
-    __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
+    __vmwrite(HOST_IDTR_BASE, (unsigned long)per_cpu(idt, cpu));
 
     __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
 
diff --git a/xen/arch/x86/include/asm/idt.h b/xen/arch/x86/include/asm/idt.h
index f00368f28c86..d795798d3eca 100644
--- a/xen/arch/x86/include/asm/idt.h
+++ b/xen/arch/x86/include/asm/idt.h
@@ -3,6 +3,7 @@
 #define X86_ASM_IDT_H
 
 #include <xen/bug.h>
+#include <xen/percpu.h>
 
 #include <asm/x86-defns.h>
 
@@ -29,7 +30,7 @@ typedef union {
 } idt_entry_t;
 
 extern idt_entry_t bsp_idt[X86_IDT_VECTORS];
-extern idt_entry_t *idt_tables[];
+DECLARE_PER_CPU(idt_entry_t *, idt);
 
 /*
  * Set the Interrupt Stack Table used by a particular IDT entry.  Typically
diff --git a/xen/arch/x86/machine_kexec.c b/xen/arch/x86/machine_kexec.c
index f775e526d59b..35fa5c82e9c2 100644
--- a/xen/arch/x86/machine_kexec.c
+++ b/xen/arch/x86/machine_kexec.c
@@ -170,9 +170,12 @@ void machine_kexec(struct kexec_image *image)
      */
     for ( i = 0; i < nr_cpu_ids; i++ )
     {
-        if ( idt_tables[i] == NULL )
+        idt_entry_t *idt = per_cpu(idt, i);
+
+        if ( !idt )
             continue;
-        _update_gate_addr_lower(&idt_tables[i][X86_EXC_MC], &trap_nop);
+
+        _update_gate_addr_lower(&idt[X86_EXC_MC], &trap_nop);
     }
 
     /* Reset CPUID masking and faulting to the host's default. */
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index dc65f9e45269..4e9f9ac4b2ee 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -863,7 +863,7 @@ int setup_cpu_root_pgt(unsigned int cpu)
         rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt);
 
     if ( !rc )
-        rc = clone_mapping(idt_tables[cpu], rpt);
+        rc = clone_mapping(per_cpu(idt, cpu), rpt);
     if ( !rc )
     {
         struct tss_page *ptr = &per_cpu(tss_page, cpu);
@@ -1009,7 +1009,7 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove)
     if ( remove )
     {
         FREE_XENHEAP_PAGE(per_cpu(gdt, cpu));
-        FREE_XENHEAP_PAGE(idt_tables[cpu]);
+        FREE_XENHEAP_PAGE(per_cpu(idt, cpu));
 
         if ( stack_base[cpu] )
         {
@@ -1076,12 +1076,12 @@ static int cpu_smpboot_alloc(unsigned int cpu)
     gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
 #endif
 
-    if ( idt_tables[cpu] == NULL )
-        idt_tables[cpu] = alloc_xenheap_pages(0, memflags);
-    if ( idt_tables[cpu] == NULL )
+    if ( per_cpu(idt, cpu) == NULL )
+        per_cpu(idt, cpu) = alloc_xenheap_pages(0, memflags);
+    if ( per_cpu(idt, cpu) == NULL )
         goto out;
-    memcpy(idt_tables[cpu], bsp_idt, sizeof(bsp_idt));
-    disable_each_ist(idt_tables[cpu]);
+    memcpy(per_cpu(idt, cpu), bsp_idt, sizeof(bsp_idt));
+    disable_each_ist(per_cpu(idt, cpu));
 
     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
diff --git a/xen/arch/x86/traps-setup.c b/xen/arch/x86/traps-setup.c
index b172ea933607..ae600526cbe3 100644
--- a/xen/arch/x86/traps-setup.c
+++ b/xen/arch/x86/traps-setup.c
@@ -7,3 +7,5 @@
 
 idt_entry_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
     bsp_idt[X86_IDT_VECTORS];
+
+DEFINE_PER_CPU_READ_MOSTLY(idt_entry_t *, idt);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 7a68996b02f7..d52840848d30 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -98,9 +98,6 @@ DEFINE_PER_CPU_READ_MOSTLY(seg_desc_t *, compat_gdt);
 DEFINE_PER_CPU_READ_MOSTLY(l1_pgentry_t, compat_gdt_l1e);
 #endif
 
-/* Pointer to the IDT of every CPU. */
-idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
-
 /*
  * The TSS is smaller than a page, but we give it a full page to avoid
  * adjacent per-cpu data leaking via Meltdown when XPTI is in use.
@@ -1939,7 +1936,7 @@ void __init init_idt_traps(void)
     enable_each_ist(bsp_idt);
 
     /* CPU0 uses the master IDT. */
-    idt_tables[0] = bsp_idt;
+    this_cpu(idt) = bsp_idt;
 
     this_cpu(gdt) = boot_gdt;
     if ( IS_ENABLED(CONFIG_PV32) )
-- 
2.39.5


Re: [PATCH v2 2/5] x86/IDT: Make idt_tables[] be per_cpu(idt)
Posted by Jan Beulich 4 days, 7 hours ago
On 05.03.2025 01:02, Andrew Cooper wrote:
> This can be a plain per_cpu() variable, and __read_mostly seeing as it's
> allocated once and never touched again.

cpu_smpboot_free() certainly touches (really: modifies) it again. Just that ...

> @@ -1009,7 +1009,7 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove)
>      if ( remove )
>      {
>          FREE_XENHEAP_PAGE(per_cpu(gdt, cpu));
> -        FREE_XENHEAP_PAGE(idt_tables[cpu]);
> +        FREE_XENHEAP_PAGE(per_cpu(idt, cpu));

... this function-like macro hides that the macro argument is written to. (This
is the kind of hiding that elsewhere I think you dislike, yet there macros were
an idea of yours, iirc.)

Jan
Re: [PATCH v2 2/5] x86/IDT: Make idt_tables[] be per_cpu(idt)
Posted by Andrew Cooper 2 days, 20 hours ago
On 05/03/2025 2:01 pm, Jan Beulich wrote:
> On 05.03.2025 01:02, Andrew Cooper wrote:
>> This can be a plain per_cpu() variable, and __read_mostly seeing as it's
>> allocated once and never touched again.
> cpu_smpboot_free() certainly touches (really: modifies) it again.

Not really.  That's a dead path because we always park CPUs.

But fine, I'll rephrase to "not touched again during it's lifetime".

~Andrew

Re: [PATCH v2 2/5] x86/IDT: Make idt_tables[] be per_cpu(idt)
Posted by Jan Beulich 2 days, 12 hours ago
On 07.03.2025 02:09, Andrew Cooper wrote:
> On 05/03/2025 2:01 pm, Jan Beulich wrote:
>> On 05.03.2025 01:02, Andrew Cooper wrote:
>>> This can be a plain per_cpu() variable, and __read_mostly seeing as it's
>>> allocated once and never touched again.
>> cpu_smpboot_free() certainly touches (really: modifies) it again.
> 
> Not really.  That's a dead path because we always park CPUs.

On Intel hardware; not (yet) on AMD / Hygon iirc.

Jan

> But fine, I'll rephrase to "not touched again during it's lifetime".
> 
> ~Andrew