Introduce support for possibly using a different L4 across the idle vCPUs.
This change only introduces support for loading a per-pPCU idle L4, but even
with the per-CPU idle page-table enabled it should still be a clone of
idle_pg_table, hence no functional change expected.
Note the idle L4 is not changed after Xen has reached the SYS_STATE_smp_boot
state, hence there are no need to synchronize the contents of the L4 once the
CPUs are started.
Using a per-CPU idle page-table is not strictly required for the Address Space
Isolation work, as idle page tables are never used when running guests.
However it simplifies memory management of the per-CPU mappings, as creating
per-CPU mappings only require using the idle page-table of the CPU where the
mappings should be created.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
xen/arch/x86/boot/x86_64.S | 11 +++++++++++
xen/arch/x86/domain.c | 20 +++++++++++++++++++-
xen/arch/x86/domain_page.c | 2 +-
xen/arch/x86/include/asm/setup.h | 1 +
xen/arch/x86/setup.c | 3 +++
xen/arch/x86/smpboot.c | 7 +++++++
6 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
index 04bb62ae8680..af7854820185 100644
--- a/xen/arch/x86/boot/x86_64.S
+++ b/xen/arch/x86/boot/x86_64.S
@@ -15,6 +15,17 @@ ENTRY(__high_start)
mov $XEN_MINIMAL_CR4,%rcx
mov %rcx,%cr4
+ /*
+ * Possibly switch to the per-CPU idle page-tables. Note we cannot
+ * switch earlier as the per-CPU page-tables might be above 4G, and
+ * hence need to load them from 64bit code.
+ */
+ mov ap_cr3(%rip), %rax
+ test %rax, %rax
+ jz .L_skip_cr3
+ mov %rax, %cr3
+.L_skip_cr3:
+
mov stack_start(%rip),%rsp
/* Reset EFLAGS (subsumes CLI and CLD). */
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 9cfcf0dc63f3..b62c4311da6c 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -555,6 +555,7 @@ void arch_vcpu_regs_init(struct vcpu *v)
int arch_vcpu_create(struct vcpu *v)
{
struct domain *d = v->domain;
+ root_pgentry_t *pgt = NULL;
int rc;
v->arch.flags = TF_kernel_mode;
@@ -589,7 +590,23 @@ int arch_vcpu_create(struct vcpu *v)
else
{
/* Idle domain */
- v->arch.cr3 = __pa(idle_pg_table);
+ if ( (opt_asi_pv || opt_asi_hvm) && v->vcpu_id )
+ {
+ pgt = alloc_xenheap_page();
+
+ /*
+ * For the idle vCPU 0 (the BSP idle vCPU) use idle_pg_table
+ * directly, there's no need to create yet another copy.
+ */
+ rc = -ENOMEM;
+ if ( !pgt )
+ goto fail;
+
+ copy_page(pgt, idle_pg_table);
+ v->arch.cr3 = __pa(pgt);
+ }
+ else
+ v->arch.cr3 = __pa(idle_pg_table);
rc = 0;
v->arch.msrs = ZERO_BLOCK_PTR; /* Catch stray misuses */
}
@@ -611,6 +628,7 @@ int arch_vcpu_create(struct vcpu *v)
vcpu_destroy_fpu(v);
xfree(v->arch.msrs);
v->arch.msrs = NULL;
+ free_xenheap_page(pgt);
return rc;
}
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
index eac5e3304fb8..99b78af90fd3 100644
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -51,7 +51,7 @@ static inline struct vcpu *mapcache_current_vcpu(void)
if ( (v = idle_vcpu[smp_processor_id()]) == current )
sync_local_execstate();
/* We must now be running on the idle page table. */
- ASSERT(cr3_pa(read_cr3()) == __pa(idle_pg_table));
+ ASSERT(cr3_pa(read_cr3()) == cr3_pa(v->arch.cr3));
}
return v;
diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
index d75589178b91..a8452fce8f05 100644
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -14,6 +14,7 @@ extern unsigned long xenheap_initial_phys_start;
extern uint64_t boot_tsc_stamp;
extern void *stack_start;
+extern unsigned long ap_cr3;
void early_cpu_init(bool verbose);
void early_time_init(void);
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index bc387d96b519..c5a13b30daf4 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -158,6 +158,9 @@ char asmlinkage __section(".init.bss.stack_aligned") __aligned(STACK_SIZE)
/* Used by the BSP/AP paths to find the higher half stack mapping to use. */
void *stack_start = cpu0_stack + STACK_SIZE - sizeof(struct cpu_info);
+/* cr3 value for the AP to load on boot. */
+unsigned long ap_cr3;
+
/* Used by the boot asm to stash the relocated multiboot info pointer. */
unsigned int asmlinkage __initdata multiboot_ptr;
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 8aa621533f3d..e07add36b1b6 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -581,6 +581,13 @@ static int do_boot_cpu(int apicid, int cpu)
stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
+ /*
+ * If per-CPU idle root page table has been allocated, switch to it as
+ * part of the AP bringup trampoline.
+ */
+ ap_cr3 = idle_vcpu[cpu]->arch.cr3 != __pa(idle_pg_table) ?
+ idle_vcpu[cpu]->arch.cr3 : 0;
+
/* This grunge runs the startup process for the targeted processor. */
set_cpu_state(CPU_STATE_INIT);
--
2.45.2