In preparation of the introduction of new vCPU operations allowing to
register the respective areas (one of the two is x86-specific) by
guest-physical address, add the necessary fork handling (with the
backing function yet to be filled in).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Extend comment.
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1641,6 +1641,68 @@ static void copy_vcpu_nonreg_state(struc
hvm_set_nonreg_state(cd_vcpu, &nrs);
}
+static int copy_guest_area(struct guest_area *cd_area,
+ const struct guest_area *d_area,
+ struct vcpu *cd_vcpu,
+ const struct domain *d)
+{
+ mfn_t d_mfn, cd_mfn;
+
+ if ( !d_area->pg )
+ return 0;
+
+ d_mfn = page_to_mfn(d_area->pg);
+
+ /* Allocate & map a page for the area if it hasn't been already. */
+ if ( !cd_area->pg )
+ {
+ gfn_t gfn = mfn_to_gfn(d, d_mfn);
+ struct p2m_domain *p2m = p2m_get_hostp2m(cd_vcpu->domain);
+ p2m_type_t p2mt;
+ p2m_access_t p2ma;
+ unsigned int offset;
+ int ret;
+
+ cd_mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL, NULL);
+ if ( mfn_eq(cd_mfn, INVALID_MFN) )
+ {
+ struct page_info *pg = alloc_domheap_page(cd_vcpu->domain, 0);
+
+ if ( !pg )
+ return -ENOMEM;
+
+ cd_mfn = page_to_mfn(pg);
+ set_gpfn_from_mfn(mfn_x(cd_mfn), gfn_x(gfn));
+
+ ret = p2m->set_entry(p2m, gfn, cd_mfn, PAGE_ORDER_4K, p2m_ram_rw,
+ p2m->default_access, -1);
+ if ( ret )
+ return ret;
+ }
+ else if ( p2mt != p2m_ram_rw )
+ return -EBUSY;
+
+ /*
+ * Map the area into the guest. For simplicity specify the entire range
+ * up to the end of the page: All the function uses it for is to check
+ * that the range doesn't cross page boundaries. Having the area mapped
+ * in the original domain implies that it fits there and therefore will
+ * also fit in the clone.
+ */
+ offset = PAGE_OFFSET(d_area->map);
+ ret = map_guest_area(cd_vcpu, gfn_to_gaddr(gfn) + offset,
+ PAGE_SIZE - offset, cd_area, NULL);
+ if ( ret )
+ return ret;
+ }
+ else
+ cd_mfn = page_to_mfn(cd_area->pg);
+
+ copy_domain_page(cd_mfn, d_mfn);
+
+ return 0;
+}
+
static int copy_vpmu(struct vcpu *d_vcpu, struct vcpu *cd_vcpu)
{
struct vpmu_struct *d_vpmu = vcpu_vpmu(d_vcpu);
@@ -1733,6 +1795,16 @@ static int copy_vcpu_settings(struct dom
copy_domain_page(new_vcpu_info_mfn, vcpu_info_mfn);
}
+ /* Same for the (physically registered) runstate and time info areas. */
+ ret = copy_guest_area(&cd_vcpu->runstate_guest_area,
+ &d_vcpu->runstate_guest_area, cd_vcpu, d);
+ if ( ret )
+ return ret;
+ ret = copy_guest_area(&cd_vcpu->arch.time_guest_area,
+ &d_vcpu->arch.time_guest_area, cd_vcpu, d);
+ if ( ret )
+ return ret;
+
ret = copy_vpmu(d_vcpu, cd_vcpu);
if ( ret )
return ret;
@@ -1974,7 +2046,10 @@ int mem_sharing_fork_reset(struct domain
state:
if ( reset_state )
+ {
rc = copy_settings(d, pd);
+ /* TBD: What to do here with -ERESTART? */
+ }
domain_unpause(d);
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1601,6 +1601,13 @@ void unmap_vcpu_info(struct vcpu *v)
put_page_and_type(mfn_to_page(mfn));
}
+int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size,
+ struct guest_area *area,
+ void (*populate)(void *dst, struct vcpu *v))
+{
+ return -EOPNOTSUPP;
+}
+
/*
* This is only intended to be used for domain cleanup (or more generally only
* with at least the respective vCPU, if it's not the current one, reliably
On Thu, Sep 28, 2023 at 09:16:20AM +0200, Jan Beulich wrote:
> In preparation of the introduction of new vCPU operations allowing to
> register the respective areas (one of the two is x86-specific) by
> guest-physical address, add the necessary fork handling (with the
> backing function yet to be filled in).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v3: Extend comment.
>
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1641,6 +1641,68 @@ static void copy_vcpu_nonreg_state(struc
> hvm_set_nonreg_state(cd_vcpu, &nrs);
> }
>
> +static int copy_guest_area(struct guest_area *cd_area,
> + const struct guest_area *d_area,
> + struct vcpu *cd_vcpu,
> + const struct domain *d)
> +{
> + mfn_t d_mfn, cd_mfn;
> +
> + if ( !d_area->pg )
> + return 0;
> +
> + d_mfn = page_to_mfn(d_area->pg);
> +
> + /* Allocate & map a page for the area if it hasn't been already. */
> + if ( !cd_area->pg )
> + {
> + gfn_t gfn = mfn_to_gfn(d, d_mfn);
> + struct p2m_domain *p2m = p2m_get_hostp2m(cd_vcpu->domain);
> + p2m_type_t p2mt;
> + p2m_access_t p2ma;
> + unsigned int offset;
> + int ret;
> +
> + cd_mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL, NULL);
> + if ( mfn_eq(cd_mfn, INVALID_MFN) )
> + {
> + struct page_info *pg = alloc_domheap_page(cd_vcpu->domain, 0);
> +
> + if ( !pg )
> + return -ENOMEM;
> +
> + cd_mfn = page_to_mfn(pg);
> + set_gpfn_from_mfn(mfn_x(cd_mfn), gfn_x(gfn));
> +
> + ret = p2m->set_entry(p2m, gfn, cd_mfn, PAGE_ORDER_4K, p2m_ram_rw,
> + p2m->default_access, -1);
> + if ( ret )
> + return ret;
> + }
I'm still unsure why map_guest_area() shouldn't be able to deal with a
forked child needing the page to be mapped. What happens when a
forked child executes the hypercall to map such areas against not yet
populated gfns?
Shouldn't map_guest_area() be capable of handling those calls and
populating on demand?
> + else if ( p2mt != p2m_ram_rw )
> + return -EBUSY;
> +
> + /*
> + * Map the area into the guest. For simplicity specify the entire range
> + * up to the end of the page: All the function uses it for is to check
> + * that the range doesn't cross page boundaries. Having the area mapped
> + * in the original domain implies that it fits there and therefore will
> + * also fit in the clone.
> + */
> + offset = PAGE_OFFSET(d_area->map);
> + ret = map_guest_area(cd_vcpu, gfn_to_gaddr(gfn) + offset,
> + PAGE_SIZE - offset, cd_area, NULL);
> + if ( ret )
> + return ret;
> + }
> + else
> + cd_mfn = page_to_mfn(cd_area->pg);
> +
> + copy_domain_page(cd_mfn, d_mfn);
I think the page copy should be done only once, when the page is
populated on the child p2m. Otherwise areas smaller than a page size
(like vpcu_time_info_t) that share the same page will get multiple
copies of the same data for no reason.
Thanks, Roger.
On 28.09.2023 11:51, Roger Pau Monné wrote:
> On Thu, Sep 28, 2023 at 09:16:20AM +0200, Jan Beulich wrote:
>> --- a/xen/arch/x86/mm/mem_sharing.c
>> +++ b/xen/arch/x86/mm/mem_sharing.c
>> @@ -1641,6 +1641,68 @@ static void copy_vcpu_nonreg_state(struc
>> hvm_set_nonreg_state(cd_vcpu, &nrs);
>> }
>>
>> +static int copy_guest_area(struct guest_area *cd_area,
>> + const struct guest_area *d_area,
>> + struct vcpu *cd_vcpu,
>> + const struct domain *d)
>> +{
>> + mfn_t d_mfn, cd_mfn;
>> +
>> + if ( !d_area->pg )
>> + return 0;
>> +
>> + d_mfn = page_to_mfn(d_area->pg);
>> +
>> + /* Allocate & map a page for the area if it hasn't been already. */
>> + if ( !cd_area->pg )
>> + {
>> + gfn_t gfn = mfn_to_gfn(d, d_mfn);
>> + struct p2m_domain *p2m = p2m_get_hostp2m(cd_vcpu->domain);
>> + p2m_type_t p2mt;
>> + p2m_access_t p2ma;
>> + unsigned int offset;
>> + int ret;
>> +
>> + cd_mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL, NULL);
>> + if ( mfn_eq(cd_mfn, INVALID_MFN) )
>> + {
>> + struct page_info *pg = alloc_domheap_page(cd_vcpu->domain, 0);
>> +
>> + if ( !pg )
>> + return -ENOMEM;
>> +
>> + cd_mfn = page_to_mfn(pg);
>> + set_gpfn_from_mfn(mfn_x(cd_mfn), gfn_x(gfn));
>> +
>> + ret = p2m->set_entry(p2m, gfn, cd_mfn, PAGE_ORDER_4K, p2m_ram_rw,
>> + p2m->default_access, -1);
>> + if ( ret )
>> + return ret;
>> + }
>
> I'm still unsure why map_guest_area() shouldn't be able to deal with a
> forked child needing the page to be mapped. What happens when a
> forked child executes the hypercall to map such areas against not yet
> populated gfns?
>
> Shouldn't map_guest_area() be capable of handling those calls and
> populating on demand?
Funny you should use this wording: P2M_ALLOC will deal with populating
PoD slots, yes, but aiui forks don't fill their p2m with all PoD slots,
but rather leave them empty. Yet again I need to leave it to Tamas to
confirm or prove me wrong.
>> + else if ( p2mt != p2m_ram_rw )
>> + return -EBUSY;
>> +
>> + /*
>> + * Map the area into the guest. For simplicity specify the entire range
>> + * up to the end of the page: All the function uses it for is to check
>> + * that the range doesn't cross page boundaries. Having the area mapped
>> + * in the original domain implies that it fits there and therefore will
>> + * also fit in the clone.
>> + */
>> + offset = PAGE_OFFSET(d_area->map);
>> + ret = map_guest_area(cd_vcpu, gfn_to_gaddr(gfn) + offset,
>> + PAGE_SIZE - offset, cd_area, NULL);
>> + if ( ret )
>> + return ret;
>> + }
>> + else
>> + cd_mfn = page_to_mfn(cd_area->pg);
>> +
>> + copy_domain_page(cd_mfn, d_mfn);
>
> I think the page copy should be done only once, when the page is
> populated on the child p2m. Otherwise areas smaller than a page size
> (like vpcu_time_info_t) that share the same page will get multiple
> copies of the same data for no reason.
I think you're right, but this would then be another issue in the original
code that I merely didn't spot (and it's not just "copy for no reason",
we'd actually corrupt what was put there before). IOW the copying needs to
move ahead of map_guest_area() (or yet more precisely after the error
checking for p2m->set_entry()), and in the original code it would have
needed to live ahead of map_vcpu_info(). Once again I'd like Tamas to
confirm (or otherwise) before making that change, though.
Jan
On Thu, Sep 28, 2023 at 12:11:02PM +0200, Jan Beulich wrote:
> On 28.09.2023 11:51, Roger Pau Monné wrote:
> > On Thu, Sep 28, 2023 at 09:16:20AM +0200, Jan Beulich wrote:
> >> --- a/xen/arch/x86/mm/mem_sharing.c
> >> +++ b/xen/arch/x86/mm/mem_sharing.c
> >> @@ -1641,6 +1641,68 @@ static void copy_vcpu_nonreg_state(struc
> >> hvm_set_nonreg_state(cd_vcpu, &nrs);
> >> }
> >>
> >> +static int copy_guest_area(struct guest_area *cd_area,
> >> + const struct guest_area *d_area,
> >> + struct vcpu *cd_vcpu,
> >> + const struct domain *d)
> >> +{
> >> + mfn_t d_mfn, cd_mfn;
> >> +
> >> + if ( !d_area->pg )
> >> + return 0;
> >> +
> >> + d_mfn = page_to_mfn(d_area->pg);
> >> +
> >> + /* Allocate & map a page for the area if it hasn't been already. */
> >> + if ( !cd_area->pg )
> >> + {
> >> + gfn_t gfn = mfn_to_gfn(d, d_mfn);
> >> + struct p2m_domain *p2m = p2m_get_hostp2m(cd_vcpu->domain);
> >> + p2m_type_t p2mt;
> >> + p2m_access_t p2ma;
> >> + unsigned int offset;
> >> + int ret;
> >> +
> >> + cd_mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL, NULL);
> >> + if ( mfn_eq(cd_mfn, INVALID_MFN) )
> >> + {
> >> + struct page_info *pg = alloc_domheap_page(cd_vcpu->domain, 0);
> >> +
> >> + if ( !pg )
> >> + return -ENOMEM;
> >> +
> >> + cd_mfn = page_to_mfn(pg);
> >> + set_gpfn_from_mfn(mfn_x(cd_mfn), gfn_x(gfn));
> >> +
> >> + ret = p2m->set_entry(p2m, gfn, cd_mfn, PAGE_ORDER_4K, p2m_ram_rw,
> >> + p2m->default_access, -1);
> >> + if ( ret )
> >> + return ret;
> >> + }
> >
> > I'm still unsure why map_guest_area() shouldn't be able to deal with a
> > forked child needing the page to be mapped. What happens when a
> > forked child executes the hypercall to map such areas against not yet
> > populated gfns?
> >
> > Shouldn't map_guest_area() be capable of handling those calls and
> > populating on demand?
>
> Funny you should use this wording: P2M_ALLOC will deal with populating
> PoD slots, yes, but aiui forks don't fill their p2m with all PoD slots,
> but rather leave them empty. Yet again I need to leave it to Tamas to
> confirm or prove me wrong.
If the child p2m populating is only triggered by guest accesses then a
lot of hypercalls are likely to not work correctly on childs.
>
> >> + else if ( p2mt != p2m_ram_rw )
> >> + return -EBUSY;
> >> +
> >> + /*
> >> + * Map the area into the guest. For simplicity specify the entire range
> >> + * up to the end of the page: All the function uses it for is to check
> >> + * that the range doesn't cross page boundaries. Having the area mapped
> >> + * in the original domain implies that it fits there and therefore will
> >> + * also fit in the clone.
> >> + */
> >> + offset = PAGE_OFFSET(d_area->map);
> >> + ret = map_guest_area(cd_vcpu, gfn_to_gaddr(gfn) + offset,
> >> + PAGE_SIZE - offset, cd_area, NULL);
> >> + if ( ret )
> >> + return ret;
> >> + }
> >> + else
> >> + cd_mfn = page_to_mfn(cd_area->pg);
> >> +
> >> + copy_domain_page(cd_mfn, d_mfn);
> >
> > I think the page copy should be done only once, when the page is
> > populated on the child p2m. Otherwise areas smaller than a page size
> > (like vpcu_time_info_t) that share the same page will get multiple
> > copies of the same data for no reason.
>
> I think you're right, but this would then be another issue in the original
> code that I merely didn't spot (and it's not just "copy for no reason",
> we'd actually corrupt what was put there before). IOW the copying needs to
> move ahead of map_guest_area() (or yet more precisely after the error
> checking for p2m->set_entry()), and in the original code it would have
> needed to live ahead of map_vcpu_info(). Once again I'd like Tamas to
> confirm (or otherwise) before making that change, though.
Yes, it's already an issue in the current code. I wonder whether
logic in the guest or Xen could malfunctions due to the fact that
map_vcpu_info() unconditionally sets evtchn_upcall_pending and injects
an event channel upcall, but the later call to copy_domain_page()
might unset evtchn_upcall_pending while the vector is already injected.
Thanks, Roger.
© 2016 - 2026 Red Hat, Inc.