This can be a plain per_cpu() variable, and __read_mostly seeing as it's
allocated once and never touched again.
This removes a NR_CPU's sized structure, and improves NUMA locality of access
for both the the VT-x and SVM context switch paths.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
xen/arch/x86/cpu/common.c | 5 +++--
xen/arch/x86/crash.c | 8 ++++----
xen/arch/x86/domain.c | 2 +-
xen/arch/x86/hvm/svm/svm.c | 4 ++--
xen/arch/x86/hvm/vmx/vmcs.c | 2 +-
xen/arch/x86/include/asm/idt.h | 3 ++-
xen/arch/x86/machine_kexec.c | 7 +++++--
xen/arch/x86/smpboot.c | 14 +++++++-------
xen/arch/x86/traps-init.c | 2 ++
xen/arch/x86/traps.c | 5 +----
10 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index e8b355ebcf36..b83dbc5dfbba 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -819,6 +819,7 @@ void load_system_tables(void)
* support using ARRAY_SIZE against per-cpu variables.
*/
struct tss_page *tss_page = &this_cpu(tss_page);
+ idt_entry_t *idt = this_cpu(idt);
/* The TSS may be live. Disuade any clever optimisations. */
volatile struct tss64 *tss = &tss_page->tss;
@@ -830,7 +831,7 @@ void load_system_tables(void)
.limit = LAST_RESERVED_GDT_BYTE,
};
const struct desc_ptr idtr = {
- .base = (unsigned long)idt_tables[cpu],
+ .base = (unsigned long)idt,
.limit = sizeof(bsp_idt) - 1,
};
@@ -914,7 +915,7 @@ void load_system_tables(void)
ltr(TSS_SELECTOR);
lldt(0);
- enable_each_ist(idt_tables[cpu]);
+ enable_each_ist(idt);
/*
* Bottom-of-stack must be 16-byte aligned!
diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index 5f7d7b392a1f..1e4b0eeff21b 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -63,7 +63,7 @@ static int noreturn cf_check do_nmi_crash(
* This update is safe from a security point of view, as this
* pcpu is never going to try to sysret back to a PV vcpu.
*/
- set_ist(&idt_tables[cpu][X86_EXC_MC], IST_NONE);
+ set_ist(&per_cpu(idt, cpu)[X86_EXC_MC], IST_NONE);
kexec_crash_save_cpu();
__stop_this_cpu();
@@ -120,6 +120,7 @@ static void nmi_shootdown_cpus(void)
{
unsigned long msecs;
unsigned int cpu = smp_processor_id();
+ idt_entry_t *idt = this_cpu(idt);
disable_lapic_nmi_watchdog();
local_irq_disable();
@@ -133,9 +134,8 @@ static void nmi_shootdown_cpus(void)
* Disable IST for MCEs to avoid stack corruption race conditions, and
* change the NMI handler to a nop to avoid deviation from this codepath.
*/
- _set_gate_lower(&idt_tables[cpu][X86_EXC_NMI],
- SYS_DESC_irq_gate, 0, &trap_nop);
- set_ist(&idt_tables[cpu][X86_EXC_MC], IST_NONE);
+ _set_gate_lower(&idt[X86_EXC_NMI], SYS_DESC_irq_gate, 0, &trap_nop);
+ set_ist(&idt[X86_EXC_MC], IST_NONE);
set_nmi_callback(do_nmi_crash);
smp_send_nmi_allbutself();
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index d3db76833f3c..a42fa5480593 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -116,7 +116,7 @@ void play_dead(void)
local_irq_disable();
/* Change the NMI handler to a nop (see comment below). */
- _set_gate_lower(&idt_tables[cpu][X86_EXC_NMI], SYS_DESC_irq_gate, 0,
+ _set_gate_lower(&this_cpu(idt)[X86_EXC_NMI], SYS_DESC_irq_gate, 0,
&trap_nop);
/*
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index ea78da4f4210..4eac89964f61 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -915,7 +915,7 @@ static void cf_check svm_ctxt_switch_from(struct vcpu *v)
svm_vmload_pa(per_cpu(host_vmcb, cpu));
/* Resume use of ISTs now that the host TR is reinstated. */
- enable_each_ist(idt_tables[cpu]);
+ enable_each_ist(per_cpu(idt, cpu));
/*
* Possibly clear previous guest selection of SSBD if set. Note that
@@ -944,7 +944,7 @@ static void cf_check svm_ctxt_switch_to(struct vcpu *v)
* Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
* But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
*/
- disable_each_ist(idt_tables[cpu]);
+ disable_each_ist(per_cpu(idt, cpu));
svm_restore_dr(v);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 20ab2d0f266f..e47a6e1542b7 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -917,7 +917,7 @@ static void vmx_set_host_env(struct vcpu *v)
__vmwrite(HOST_GDTR_BASE,
(unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY));
- __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
+ __vmwrite(HOST_IDTR_BASE, (unsigned long)per_cpu(idt, cpu));
__vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
diff --git a/xen/arch/x86/include/asm/idt.h b/xen/arch/x86/include/asm/idt.h
index 29d1a7dfbc63..3e3acdfa7930 100644
--- a/xen/arch/x86/include/asm/idt.h
+++ b/xen/arch/x86/include/asm/idt.h
@@ -3,6 +3,7 @@
#define X86_ASM_IDT_H
#include <xen/bug.h>
+#include <xen/percpu.h>
#include <xen/types.h>
#include <asm/x86-defns.h>
@@ -30,7 +31,7 @@ typedef union {
} idt_entry_t;
extern idt_entry_t bsp_idt[X86_IDT_VECTORS];
-extern idt_entry_t *idt_tables[];
+DECLARE_PER_CPU(idt_entry_t *, idt);
/*
* Set the Interrupt Stack Table used by a particular IDT entry. Typically
diff --git a/xen/arch/x86/machine_kexec.c b/xen/arch/x86/machine_kexec.c
index f775e526d59b..35fa5c82e9c2 100644
--- a/xen/arch/x86/machine_kexec.c
+++ b/xen/arch/x86/machine_kexec.c
@@ -170,9 +170,12 @@ void machine_kexec(struct kexec_image *image)
*/
for ( i = 0; i < nr_cpu_ids; i++ )
{
- if ( idt_tables[i] == NULL )
+ idt_entry_t *idt = per_cpu(idt, i);
+
+ if ( !idt )
continue;
- _update_gate_addr_lower(&idt_tables[i][X86_EXC_MC], &trap_nop);
+
+ _update_gate_addr_lower(&idt[X86_EXC_MC], &trap_nop);
}
/* Reset CPUID masking and faulting to the host's default. */
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index dc65f9e45269..4e9f9ac4b2ee 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -863,7 +863,7 @@ int setup_cpu_root_pgt(unsigned int cpu)
rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt);
if ( !rc )
- rc = clone_mapping(idt_tables[cpu], rpt);
+ rc = clone_mapping(per_cpu(idt, cpu), rpt);
if ( !rc )
{
struct tss_page *ptr = &per_cpu(tss_page, cpu);
@@ -1009,7 +1009,7 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove)
if ( remove )
{
FREE_XENHEAP_PAGE(per_cpu(gdt, cpu));
- FREE_XENHEAP_PAGE(idt_tables[cpu]);
+ FREE_XENHEAP_PAGE(per_cpu(idt, cpu));
if ( stack_base[cpu] )
{
@@ -1076,12 +1076,12 @@ static int cpu_smpboot_alloc(unsigned int cpu)
gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
#endif
- if ( idt_tables[cpu] == NULL )
- idt_tables[cpu] = alloc_xenheap_pages(0, memflags);
- if ( idt_tables[cpu] == NULL )
+ if ( per_cpu(idt, cpu) == NULL )
+ per_cpu(idt, cpu) = alloc_xenheap_pages(0, memflags);
+ if ( per_cpu(idt, cpu) == NULL )
goto out;
- memcpy(idt_tables[cpu], bsp_idt, sizeof(bsp_idt));
- disable_each_ist(idt_tables[cpu]);
+ memcpy(per_cpu(idt, cpu), bsp_idt, sizeof(bsp_idt));
+ disable_each_ist(per_cpu(idt, cpu));
for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
diff --git a/xen/arch/x86/traps-init.c b/xen/arch/x86/traps-init.c
index b172ea933607..ae600526cbe3 100644
--- a/xen/arch/x86/traps-init.c
+++ b/xen/arch/x86/traps-init.c
@@ -7,3 +7,5 @@
idt_entry_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
bsp_idt[X86_IDT_VECTORS];
+
+DEFINE_PER_CPU_READ_MOSTLY(idt_entry_t *, idt);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index f7965b3ffa50..aa3ed658def6 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -102,9 +102,6 @@ DEFINE_PER_CPU_READ_MOSTLY(seg_desc_t *, compat_gdt);
DEFINE_PER_CPU_READ_MOSTLY(l1_pgentry_t, compat_gdt_l1e);
#endif
-/* Pointer to the IDT of every CPU. */
-idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
-
/*
* The TSS is smaller than a page, but we give it a full page to avoid
* adjacent per-cpu data leaking via Meltdown when XPTI is in use.
@@ -2149,7 +2146,7 @@ void __init init_idt_traps(void)
enable_each_ist(bsp_idt);
/* CPU0 uses the master IDT. */
- idt_tables[0] = bsp_idt;
+ this_cpu(idt) = bsp_idt;
this_cpu(gdt) = boot_gdt;
if ( IS_ENABLED(CONFIG_PV32) )
--
2.39.5
On 24.02.2025 17:05, Andrew Cooper wrote:
> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -819,6 +819,7 @@ void load_system_tables(void)
> * support using ARRAY_SIZE against per-cpu variables.
> */
> struct tss_page *tss_page = &this_cpu(tss_page);
> + idt_entry_t *idt = this_cpu(idt);
Nit: Tab indentation here.
> @@ -830,7 +831,7 @@ void load_system_tables(void)
> .limit = LAST_RESERVED_GDT_BYTE,
> };
> const struct desc_ptr idtr = {
> - .base = (unsigned long)idt_tables[cpu],
> + .base = (unsigned long)idt,
> .limit = sizeof(bsp_idt) - 1,
> };
Coming back to the comment on the earlier patch: Now that you touch all
of the idt_tables[] uses anyway, ...
> @@ -30,7 +31,7 @@ typedef union {
> } idt_entry_t;
>
> extern idt_entry_t bsp_idt[X86_IDT_VECTORS];
> -extern idt_entry_t *idt_tables[];
> +DECLARE_PER_CPU(idt_entry_t *, idt);
... this probably really ought to become
DECLARE_PER_CPU(idt_entry_t (*)[X86_IDT_VECTORS], idt);
?
Jan
On 25/02/2025 9:07 am, Jan Beulich wrote:
> On 24.02.2025 17:05, Andrew Cooper wrote:
>> --- a/xen/arch/x86/cpu/common.c
>> +++ b/xen/arch/x86/cpu/common.c
>> @@ -819,6 +819,7 @@ void load_system_tables(void)
>> * support using ARRAY_SIZE against per-cpu variables.
>> */
>> struct tss_page *tss_page = &this_cpu(tss_page);
>> + idt_entry_t *idt = this_cpu(idt);
> Nit: Tab indentation here.
Yeah, I noticed that only after sending the email. Other parts of the
FRED series vastly changes this function.
>
>> @@ -830,7 +831,7 @@ void load_system_tables(void)
>> .limit = LAST_RESERVED_GDT_BYTE,
>> };
>> const struct desc_ptr idtr = {
>> - .base = (unsigned long)idt_tables[cpu],
>> + .base = (unsigned long)idt,
>> .limit = sizeof(bsp_idt) - 1,
>> };
> Coming back to the comment on the earlier patch: Now that you touch all
> of the idt_tables[] uses anyway, ...
>
>> @@ -30,7 +31,7 @@ typedef union {
>> } idt_entry_t;
>>
>> extern idt_entry_t bsp_idt[X86_IDT_VECTORS];
>> -extern idt_entry_t *idt_tables[];
>> +DECLARE_PER_CPU(idt_entry_t *, idt);
> ... this probably really ought to become
>
> DECLARE_PER_CPU(idt_entry_t (*)[X86_IDT_VECTORS], idt);
>
> ?
I'm afraid this doesn't compile.
arch/x86/crash.c:66:17: error: passing argument 1 of ‘set_ist’ from
incompatible pointer type [-Werror=incompatible-pointer-types]
...
note: expected ‘volatile idt_entry_t *’ but argument is of type
‘idt_entry_t (*)[256]’
Similarly {en,dis}able_each_ist() and _set_gate_lower().
~Andrew
On 25.02.2025 16:40, Andrew Cooper wrote:
> On 25/02/2025 9:07 am, Jan Beulich wrote:
>> On 24.02.2025 17:05, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/cpu/common.c
>>> +++ b/xen/arch/x86/cpu/common.c
>>> @@ -819,6 +819,7 @@ void load_system_tables(void)
>>> * support using ARRAY_SIZE against per-cpu variables.
>>> */
>>> struct tss_page *tss_page = &this_cpu(tss_page);
>>> + idt_entry_t *idt = this_cpu(idt);
>> Nit: Tab indentation here.
>
> Yeah, I noticed that only after sending the email. Other parts of the
> FRED series vastly changes this function.
>
>>
>>> @@ -830,7 +831,7 @@ void load_system_tables(void)
>>> .limit = LAST_RESERVED_GDT_BYTE,
>>> };
>>> const struct desc_ptr idtr = {
>>> - .base = (unsigned long)idt_tables[cpu],
>>> + .base = (unsigned long)idt,
>>> .limit = sizeof(bsp_idt) - 1,
>>> };
>> Coming back to the comment on the earlier patch: Now that you touch all
>> of the idt_tables[] uses anyway, ...
>>
>>> @@ -30,7 +31,7 @@ typedef union {
>>> } idt_entry_t;
>>>
>>> extern idt_entry_t bsp_idt[X86_IDT_VECTORS];
>>> -extern idt_entry_t *idt_tables[];
>>> +DECLARE_PER_CPU(idt_entry_t *, idt);
>> ... this probably really ought to become
>>
>> DECLARE_PER_CPU(idt_entry_t (*)[X86_IDT_VECTORS], idt);
>>
>> ?
>
> I'm afraid this doesn't compile.
>
> arch/x86/crash.c:66:17: error: passing argument 1 of ‘set_ist’ from
> incompatible pointer type [-Werror=incompatible-pointer-types]
> ...
> note: expected ‘volatile idt_entry_t *’ but argument is of type
> ‘idt_entry_t (*)[256]’
>
> Similarly {en,dis}able_each_ist() and _set_gate_lower().
Well, did you adjust the use sites? As said in the respective comment on
patch 4, that'll be necessary (to account for the abstract extra level of
indirection; generated code ought to not change).
Jan
On 25/02/2025 4:33 pm, Jan Beulich wrote:
> On 25.02.2025 16:40, Andrew Cooper wrote:
>> On 25/02/2025 9:07 am, Jan Beulich wrote:
>>> On 24.02.2025 17:05, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/cpu/common.c
>>>> +++ b/xen/arch/x86/cpu/common.c
>>>> @@ -819,6 +819,7 @@ void load_system_tables(void)
>>>> * support using ARRAY_SIZE against per-cpu variables.
>>>> */
>>>> struct tss_page *tss_page = &this_cpu(tss_page);
>>>> + idt_entry_t *idt = this_cpu(idt);
>>> Nit: Tab indentation here.
>> Yeah, I noticed that only after sending the email. Other parts of the
>> FRED series vastly changes this function.
>>
>>>> @@ -830,7 +831,7 @@ void load_system_tables(void)
>>>> .limit = LAST_RESERVED_GDT_BYTE,
>>>> };
>>>> const struct desc_ptr idtr = {
>>>> - .base = (unsigned long)idt_tables[cpu],
>>>> + .base = (unsigned long)idt,
>>>> .limit = sizeof(bsp_idt) - 1,
>>>> };
>>> Coming back to the comment on the earlier patch: Now that you touch all
>>> of the idt_tables[] uses anyway, ...
>>>
>>>> @@ -30,7 +31,7 @@ typedef union {
>>>> } idt_entry_t;
>>>>
>>>> extern idt_entry_t bsp_idt[X86_IDT_VECTORS];
>>>> -extern idt_entry_t *idt_tables[];
>>>> +DECLARE_PER_CPU(idt_entry_t *, idt);
>>> ... this probably really ought to become
>>>
>>> DECLARE_PER_CPU(idt_entry_t (*)[X86_IDT_VECTORS], idt);
>>>
>>> ?
>> I'm afraid this doesn't compile.
>>
>> arch/x86/crash.c:66:17: error: passing argument 1 of ‘set_ist’ from
>> incompatible pointer type [-Werror=incompatible-pointer-types]
>> ...
>> note: expected ‘volatile idt_entry_t *’ but argument is of type
>> ‘idt_entry_t (*)[256]’
>>
>> Similarly {en,dis}able_each_ist() and _set_gate_lower().
> Well, did you adjust the use sites? As said in the respective comment on
> patch 4, that'll be necessary (to account for the abstract extra level of
> indirection; generated code ought to not change).
Having spent even more time, I'm giving up and rejecting this suggestion.
Some can be made to compile. Some I can't make compile, and the result
is a good demonstration of why exotic types like this don't get common use.
If you want to try once this series is in, then feel free, but the
result is not pretty.
~Andrew
© 2016 - 2025 Red Hat, Inc.