From: Hongyan Xia <hongyxia@amazon.com>
Create a per-domain mapping of PV guest_root_pt as direct map is being
removed.
Note that we do not map and unmap root_pgt for now since it is still a
xenheap page.
Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
Signed-off-by: Julien Grall <jgrall@amazon.com>
Signed-off-by: Elias El Yandouzi <eliasely@amazon.com>
----
Changes in V3:
* Rename SHADOW_ROOT
* Haven't addressed the potentially over-allocation issue as I don't get it
Changes in V2:
* Rework the shadow perdomain mapping solution in the follow-up patches
Changes since Hongyan's version:
* Remove the final dot in the commit title
diff --git a/xen/arch/x86/include/asm/config.h b/xen/arch/x86/include/asm/config.h
index ab7288cb36..5d710384df 100644
--- a/xen/arch/x86/include/asm/config.h
+++ b/xen/arch/x86/include/asm/config.h
@@ -203,7 +203,7 @@ extern unsigned char boot_edid_info[128];
/* Slot 260: per-domain mappings (including map cache). */
#define PERDOMAIN_VIRT_START (PML4_ADDR(260))
#define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
-#define PERDOMAIN_SLOTS 3
+#define PERDOMAIN_SLOTS 4
#define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \
(PERDOMAIN_SLOT_MBYTES << 20))
/* Slot 4: mirror of per-domain mappings (for compat xlat area accesses). */
@@ -317,6 +317,14 @@ extern unsigned long xen_phys_start;
#define ARG_XLAT_START(v) \
(ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
+/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */
+#define PV_ROOT_PT_MAPPING_VIRT_START PERDOMAIN_VIRT_SLOT(3)
+#define PV_ROOT_PT_MAPPING_ENTRIES MAX_VIRT_CPUS
+
+/* The address of a particular VCPU's PV_ROOT_PT */
+#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \
+ (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE))
+
#define ELFSIZE 64
#define ARCH_CRASH_SAVE_VMCOREINFO
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index f5daeb182b..8a97530607 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -272,6 +272,7 @@ struct time_scale {
struct pv_domain
{
l1_pgentry_t **gdt_ldt_l1tab;
+ l1_pgentry_t **root_pt_l1tab;
atomic_t nr_l4_pages;
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index d968bbbc73..efdf20f775 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -505,6 +505,13 @@ void share_xen_page_with_guest(struct page_info *page, struct domain *d,
nrspin_unlock(&d->page_alloc_lock);
}
+#define pv_root_pt_idx(v) \
+ ((v)->vcpu_id >> PAGETABLE_ORDER)
+
+#define pv_root_pt_pte(v) \
+ ((v)->domain->arch.pv.root_pt_l1tab[pv_root_pt_idx(v)] + \
+ ((v)->vcpu_id & (L1_PAGETABLE_ENTRIES - 1)))
+
void make_cr3(struct vcpu *v, mfn_t mfn)
{
struct domain *d = v->domain;
@@ -524,6 +531,13 @@ void write_ptbase(struct vcpu *v)
if ( is_pv_vcpu(v) && v->domain->arch.pv.xpti )
{
+ mfn_t guest_root_pt = _mfn(MASK_EXTR(v->arch.cr3, PAGE_MASK));
+ l1_pgentry_t *pte = pv_root_pt_pte(v);
+
+ ASSERT(v == current);
+
+ l1e_write(pte, l1e_from_mfn(guest_root_pt, __PAGE_HYPERVISOR_RO));
+
cpu_info->root_pgt_changed = true;
cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
if ( new_cr4 & X86_CR4_PCIDE )
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 2a445bb17b..1b025986f7 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -288,6 +288,21 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v)
1U << GDT_LDT_VCPU_SHIFT);
}
+static int pv_create_root_pt_l1tab(struct vcpu *v)
+{
+ return create_perdomain_mapping(v->domain,
+ PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v),
+ 1, v->domain->arch.pv.root_pt_l1tab,
+ NULL);
+}
+
+static void pv_destroy_root_pt_l1tab(struct vcpu *v)
+
+{
+ destroy_perdomain_mapping(v->domain,
+ PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), 1);
+}
+
void pv_vcpu_destroy(struct vcpu *v)
{
if ( is_pv_32bit_vcpu(v) )
@@ -297,6 +312,7 @@ void pv_vcpu_destroy(struct vcpu *v)
}
pv_destroy_gdt_ldt_l1tab(v);
+ pv_destroy_root_pt_l1tab(v);
XFREE(v->arch.pv.trap_ctxt);
}
@@ -311,6 +327,13 @@ int pv_vcpu_initialise(struct vcpu *v)
if ( rc )
return rc;
+ if ( v->domain->arch.pv.xpti )
+ {
+ rc = pv_create_root_pt_l1tab(v);
+ if ( rc )
+ goto done;
+ }
+
BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
PAGE_SIZE);
v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
@@ -346,10 +369,12 @@ void pv_domain_destroy(struct domain *d)
destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+ destroy_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START, PV_ROOT_PT_MAPPING_ENTRIES);
XFREE(d->arch.pv.cpuidmasks);
FREE_XENHEAP_PAGE(d->arch.pv.gdt_ldt_l1tab);
+ FREE_XENHEAP_PAGE(d->arch.pv.root_pt_l1tab);
}
void noreturn cf_check continue_pv_domain(void);
@@ -371,6 +396,12 @@ int pv_domain_initialise(struct domain *d)
goto fail;
clear_page(d->arch.pv.gdt_ldt_l1tab);
+ d->arch.pv.root_pt_l1tab =
+ alloc_xenheap_pages(0, MEMF_node(domain_to_node(d)));
+ if ( !d->arch.pv.root_pt_l1tab )
+ goto fail;
+ clear_page(d->arch.pv.root_pt_l1tab);
+
if ( levelling_caps & ~LCAP_faulting &&
(d->arch.pv.cpuidmasks = xmemdup(&cpuidmask_defaults)) == NULL )
goto fail;
@@ -381,6 +412,11 @@ int pv_domain_initialise(struct domain *d)
if ( rc )
goto fail;
+ rc = create_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START,
+ PV_ROOT_PT_MAPPING_ENTRIES, NULL, NULL);
+ if ( rc )
+ goto fail;
+
d->arch.ctxt_switch = &pv_csw;
d->arch.pv.xpti = is_hardware_domain(d) ? opt_xpti_hwdom : opt_xpti_domu;
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 630bdc3945..c1ae5013af 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -80,6 +80,7 @@ void __dummy__(void)
#undef OFFSET_EF
+ OFFSET(VCPU_id, struct vcpu, vcpu_id);
OFFSET(VCPU_processor, struct vcpu, processor);
OFFSET(VCPU_domain, struct vcpu, domain);
OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index df015589ce..c1377da7a5 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -162,7 +162,15 @@ FUNC_LOCAL(restore_all_guest)
and %rsi, %rdi
and %r9, %rsi
add %rcx, %rdi
+
+ /*
+ * The address in the vCPU cr3 is always mapped in the per-domain
+ * pv_root_pt virt area.
+ */
+ imul $PAGE_SIZE, VCPU_id(%rbx), %esi
+ movabs $PV_ROOT_PT_MAPPING_VIRT_START, %rcx
add %rcx, %rsi
+
mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)
--
2.40.1
On Mon, May 13, 2024 at 11:10:59AM +0000, Elias El Yandouzi wrote: > From: Hongyan Xia <hongyxia@amazon.com> > > Create a per-domain mapping of PV guest_root_pt as direct map is being > removed. > > Note that we do not map and unmap root_pgt for now since it is still a > xenheap page. I'm afraid this needs more context, at least for me to properly understand. I think I've figured out what create_perdomain_mapping() is supposed to do, and I have to admit the interface is very awkward. Anyway, attempted to provide some feedback. > > Signed-off-by: Hongyan Xia <hongyxia@amazon.com> > Signed-off-by: Julien Grall <jgrall@amazon.com> > Signed-off-by: Elias El Yandouzi <eliasely@amazon.com> > > ---- > Changes in V3: > * Rename SHADOW_ROOT > * Haven't addressed the potentially over-allocation issue as I don't get it > > Changes in V2: > * Rework the shadow perdomain mapping solution in the follow-up patches > > Changes since Hongyan's version: > * Remove the final dot in the commit title > > diff --git a/xen/arch/x86/include/asm/config.h b/xen/arch/x86/include/asm/config.h > index ab7288cb36..5d710384df 100644 > --- a/xen/arch/x86/include/asm/config.h > +++ b/xen/arch/x86/include/asm/config.h > @@ -203,7 +203,7 @@ extern unsigned char boot_edid_info[128]; > /* Slot 260: per-domain mappings (including map cache). */ > #define PERDOMAIN_VIRT_START (PML4_ADDR(260)) > #define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER)) > -#define PERDOMAIN_SLOTS 3 > +#define PERDOMAIN_SLOTS 4 > #define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \ > (PERDOMAIN_SLOT_MBYTES << 20)) > /* Slot 4: mirror of per-domain mappings (for compat xlat area accesses). */ > @@ -317,6 +317,14 @@ extern unsigned long xen_phys_start; > #define ARG_XLAT_START(v) \ > (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT)) > > +/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */ > +#define PV_ROOT_PT_MAPPING_VIRT_START PERDOMAIN_VIRT_SLOT(3) > +#define PV_ROOT_PT_MAPPING_ENTRIES MAX_VIRT_CPUS > + > +/* The address of a particular VCPU's PV_ROOT_PT */ > +#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \ > + (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE)) I know we are not there yet, but I wonder if we need to start having some non-shared per-cpu mapping area in the page-tables. Right now this is shared between all the vCPUs, as it's per-domain space (instead of per-vCPU). > + > #define ELFSIZE 64 > > #define ARCH_CRASH_SAVE_VMCOREINFO > diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h > index f5daeb182b..8a97530607 100644 > --- a/xen/arch/x86/include/asm/domain.h > +++ b/xen/arch/x86/include/asm/domain.h > @@ -272,6 +272,7 @@ struct time_scale { > struct pv_domain > { > l1_pgentry_t **gdt_ldt_l1tab; > + l1_pgentry_t **root_pt_l1tab; > > atomic_t nr_l4_pages; > > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c > index d968bbbc73..efdf20f775 100644 > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -505,6 +505,13 @@ void share_xen_page_with_guest(struct page_info *page, struct domain *d, > nrspin_unlock(&d->page_alloc_lock); > } > > +#define pv_root_pt_idx(v) \ > + ((v)->vcpu_id >> PAGETABLE_ORDER) > + > +#define pv_root_pt_pte(v) \ > + ((v)->domain->arch.pv.root_pt_l1tab[pv_root_pt_idx(v)] + \ > + ((v)->vcpu_id & (L1_PAGETABLE_ENTRIES - 1))) > + > void make_cr3(struct vcpu *v, mfn_t mfn) > { > struct domain *d = v->domain; > @@ -524,6 +531,13 @@ void write_ptbase(struct vcpu *v) > > if ( is_pv_vcpu(v) && v->domain->arch.pv.xpti ) > { > + mfn_t guest_root_pt = _mfn(MASK_EXTR(v->arch.cr3, PAGE_MASK)); > + l1_pgentry_t *pte = pv_root_pt_pte(v); > + > + ASSERT(v == current); > + > + l1e_write(pte, l1e_from_mfn(guest_root_pt, __PAGE_HYPERVISOR_RO)); > + > cpu_info->root_pgt_changed = true; > cpu_info->pv_cr3 = __pa(this_cpu(root_pgt)); > if ( new_cr4 & X86_CR4_PCIDE ) > diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c > index 2a445bb17b..1b025986f7 100644 > --- a/xen/arch/x86/pv/domain.c > +++ b/xen/arch/x86/pv/domain.c > @@ -288,6 +288,21 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v) > 1U << GDT_LDT_VCPU_SHIFT); > } > > +static int pv_create_root_pt_l1tab(struct vcpu *v) > +{ > + return create_perdomain_mapping(v->domain, > + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), > + 1, v->domain->arch.pv.root_pt_l1tab, > + NULL); > +} > + > +static void pv_destroy_root_pt_l1tab(struct vcpu *v) The two 'v' parameters could be const here. > + > +{ > + destroy_perdomain_mapping(v->domain, > + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), 1); > +} > + > void pv_vcpu_destroy(struct vcpu *v) > { > if ( is_pv_32bit_vcpu(v) ) > @@ -297,6 +312,7 @@ void pv_vcpu_destroy(struct vcpu *v) > } > > pv_destroy_gdt_ldt_l1tab(v); > + pv_destroy_root_pt_l1tab(v); > XFREE(v->arch.pv.trap_ctxt); > } > > @@ -311,6 +327,13 @@ int pv_vcpu_initialise(struct vcpu *v) > if ( rc ) > return rc; > > + if ( v->domain->arch.pv.xpti ) > + { > + rc = pv_create_root_pt_l1tab(v); > + if ( rc ) > + goto done; > + } > + > BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) > > PAGE_SIZE); > v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS); > @@ -346,10 +369,12 @@ void pv_domain_destroy(struct domain *d) > > destroy_perdomain_mapping(d, GDT_LDT_VIRT_START, > GDT_LDT_MBYTES << (20 - PAGE_SHIFT)); > + destroy_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START, PV_ROOT_PT_MAPPING_ENTRIES); Line too long. Also see comment below about using d->max_vcpus instead of MAX_VIRT_CPUS. > > XFREE(d->arch.pv.cpuidmasks); > > FREE_XENHEAP_PAGE(d->arch.pv.gdt_ldt_l1tab); > + FREE_XENHEAP_PAGE(d->arch.pv.root_pt_l1tab); > } > > void noreturn cf_check continue_pv_domain(void); > @@ -371,6 +396,12 @@ int pv_domain_initialise(struct domain *d) > goto fail; > clear_page(d->arch.pv.gdt_ldt_l1tab); > > + d->arch.pv.root_pt_l1tab = > + alloc_xenheap_pages(0, MEMF_node(domain_to_node(d))); > + if ( !d->arch.pv.root_pt_l1tab ) > + goto fail; > + clear_page(d->arch.pv.root_pt_l1tab); > + > if ( levelling_caps & ~LCAP_faulting && > (d->arch.pv.cpuidmasks = xmemdup(&cpuidmask_defaults)) == NULL ) > goto fail; > @@ -381,6 +412,11 @@ int pv_domain_initialise(struct domain *d) > if ( rc ) > goto fail; > > + rc = create_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START, > + PV_ROOT_PT_MAPPING_ENTRIES, NULL, NULL); Why not use d->max_vcpus here, instead of forcing up to MAX_VIRT_CPUS? By the time pv_domain_initialise() is called max_vcpus should already be initialized. AFAICT it doesn't make a difference, because for the call here only the L3 table is created, as last two parameters are NULL, but still is more accurate to use max_vcpus. > + if ( rc ) > + goto fail; > + > d->arch.ctxt_switch = &pv_csw; > > d->arch.pv.xpti = is_hardware_domain(d) ? opt_xpti_hwdom : opt_xpti_domu; > diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c > index 630bdc3945..c1ae5013af 100644 > --- a/xen/arch/x86/x86_64/asm-offsets.c > +++ b/xen/arch/x86/x86_64/asm-offsets.c > @@ -80,6 +80,7 @@ void __dummy__(void) > > #undef OFFSET_EF > > + OFFSET(VCPU_id, struct vcpu, vcpu_id); > OFFSET(VCPU_processor, struct vcpu, processor); > OFFSET(VCPU_domain, struct vcpu, domain); > OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map); > diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S > index df015589ce..c1377da7a5 100644 > --- a/xen/arch/x86/x86_64/entry.S > +++ b/xen/arch/x86/x86_64/entry.S > @@ -162,7 +162,15 @@ FUNC_LOCAL(restore_all_guest) > and %rsi, %rdi > and %r9, %rsi > add %rcx, %rdi > + > + /* > + * The address in the vCPU cr3 is always mapped in the per-domain > + * pv_root_pt virt area. > + */ > + imul $PAGE_SIZE, VCPU_id(%rbx), %esi Aren't some of the previous operations against %rsi now useless since it gets unconditionally overwritten here? and %r9, %rsi [...] add %rcx, %rsi Thanks, Roger.
Hi Roger, On 13/05/2024 16:27, Roger Pau Monné wrote: >> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c >> index 2a445bb17b..1b025986f7 100644 >> --- a/xen/arch/x86/pv/domain.c >> +++ b/xen/arch/x86/pv/domain.c >> @@ -288,6 +288,21 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v) >> 1U << GDT_LDT_VCPU_SHIFT); >> } >> >> +static int pv_create_root_pt_l1tab(struct vcpu *v) >> +{ >> + return create_perdomain_mapping(v->domain, >> + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), >> + 1, v->domain->arch.pv.root_pt_l1tab, >> + NULL); >> +} >> + >> +static void pv_destroy_root_pt_l1tab(struct vcpu *v) > > The two 'v' parameters could be const here. I could constify the parameters but the functions wouldn't be consistent with the two above for gdt/ldt. >> @@ -381,6 +412,11 @@ int pv_domain_initialise(struct domain *d) >> if ( rc ) >> goto fail; >> >> + rc = create_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START, >> + PV_ROOT_PT_MAPPING_ENTRIES, NULL, NULL); > > Why not use d->max_vcpus here, instead of forcing up to MAX_VIRT_CPUS? > > By the time pv_domain_initialise() is called max_vcpus should already > be initialized. AFAICT it doesn't make a difference, because for the > call here only the L3 table is created, as last two parameters are > NULL, but still is more accurate to use max_vcpus. There is no particular reason. I think we can safely use d->max_vcpus. >> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S >> index df015589ce..c1377da7a5 100644 >> --- a/xen/arch/x86/x86_64/entry.S >> +++ b/xen/arch/x86/x86_64/entry.S >> @@ -162,7 +162,15 @@ FUNC_LOCAL(restore_all_guest) >> and %rsi, %rdi >> and %r9, %rsi >> add %rcx, %rdi >> + >> + /* >> + * The address in the vCPU cr3 is always mapped in the per-domain >> + * pv_root_pt virt area. >> + */ >> + imul $PAGE_SIZE, VCPU_id(%rbx), %esi > > Aren't some of the previous operations against %rsi now useless since > it gets unconditionally overwritten here? I think I can just get rid off of: and %r9, %rsi > and %r9, %rsi > [...] > add %rcx, %rsi The second operation you suggested is actually used to retrieve the VA of the PV_ROOT_PT. Cheers, Elias
On Tue, May 14, 2024 at 06:15:57PM +0100, Elias El Yandouzi wrote: > Hi Roger, > > On 13/05/2024 16:27, Roger Pau Monné wrote: > > > diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c > > > index 2a445bb17b..1b025986f7 100644 > > > --- a/xen/arch/x86/pv/domain.c > > > +++ b/xen/arch/x86/pv/domain.c > > > @@ -288,6 +288,21 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v) > > > 1U << GDT_LDT_VCPU_SHIFT); > > > } > > > +static int pv_create_root_pt_l1tab(struct vcpu *v) > > > +{ > > > + return create_perdomain_mapping(v->domain, > > > + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), > > > + 1, v->domain->arch.pv.root_pt_l1tab, > > > + NULL); > > > +} > > > + > > > +static void pv_destroy_root_pt_l1tab(struct vcpu *v) > > > > The two 'v' parameters could be const here. > > I could constify the parameters but the functions wouldn't be consistent > with the two above for gdt/ldt. The fact they are not const for the other helpers would also need fixing at some point IMO, it's best if those are already using the correct type. > > > diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S > > > index df015589ce..c1377da7a5 100644 > > > --- a/xen/arch/x86/x86_64/entry.S > > > +++ b/xen/arch/x86/x86_64/entry.S > > > @@ -162,7 +162,15 @@ FUNC_LOCAL(restore_all_guest) > > > and %rsi, %rdi > > > and %r9, %rsi > > > add %rcx, %rdi > > > + > > > + /* > > > + * The address in the vCPU cr3 is always mapped in the per-domain > > > + * pv_root_pt virt area. > > > + */ > > > + imul $PAGE_SIZE, VCPU_id(%rbx), %esi > > > > Aren't some of the previous operations against %rsi now useless since > > it gets unconditionally overwritten here? > > I think I can just get rid off of: > > and %r9, %rsi > > > and %r9, %rsi > > [...] > > add %rcx, %rsi > > The second operation you suggested is actually used to retrieve the VA of > the PV_ROOT_PT. Oh, yes, sorry, got confused when looking at the source file together with the diff, it's only the `and` that can be removed. Thanks, Roger.
On 13.05.2024 17:27, Roger Pau Monné wrote: > On Mon, May 13, 2024 at 11:10:59AM +0000, Elias El Yandouzi wrote: >> @@ -317,6 +317,14 @@ extern unsigned long xen_phys_start; >> #define ARG_XLAT_START(v) \ >> (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT)) >> >> +/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */ >> +#define PV_ROOT_PT_MAPPING_VIRT_START PERDOMAIN_VIRT_SLOT(3) >> +#define PV_ROOT_PT_MAPPING_ENTRIES MAX_VIRT_CPUS >> + >> +/* The address of a particular VCPU's PV_ROOT_PT */ >> +#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \ >> + (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE)) > > I know we are not there yet, but I wonder if we need to start having > some non-shared per-cpu mapping area in the page-tables. Right now > this is shared between all the vCPUs, as it's per-domain space > (instead of per-vCPU). In turn requiring per-vCPU page tables, posing a problem when a guest uses the same page tables for multiple vCPU-s. Jan
On 14/05/2024 09:03, Jan Beulich wrote: > On 13.05.2024 17:27, Roger Pau Monné wrote: >> On Mon, May 13, 2024 at 11:10:59AM +0000, Elias El Yandouzi wrote: >>> @@ -317,6 +317,14 @@ extern unsigned long xen_phys_start; >>> #define ARG_XLAT_START(v) \ >>> (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT)) >>> >>> +/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */ >>> +#define PV_ROOT_PT_MAPPING_VIRT_START PERDOMAIN_VIRT_SLOT(3) >>> +#define PV_ROOT_PT_MAPPING_ENTRIES MAX_VIRT_CPUS >>> + >>> +/* The address of a particular VCPU's PV_ROOT_PT */ >>> +#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \ >>> + (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE)) >> >> I know we are not there yet, but I wonder if we need to start having >> some non-shared per-cpu mapping area in the page-tables. Right now >> this is shared between all the vCPUs, as it's per-domain space >> (instead of per-vCPU). > > In turn requiring per-vCPU page tables, posing a problem when a guest > uses the same page tables for multiple vCPU-s. > > Jan > True. Having separate page tables per CPU is an unavoidable end goal for a hypervisor claiming to hold no secrets, however. Otherwise any CPU can still speculatively read the stacks of other CPUs and take well-timed glances over mappings set transiently by any other CPU. Cheers, Alejandro
© 2016 - 2024 Red Hat, Inc.