[v3] runstate/time area registration by (guest) physical address

[PATCH v3 8/8] common: convert vCPU info area registration

Posted by Jan Beulich 2 years, 9 months ago

Switch to using map_guest_area(). Noteworthy differences from
map_vcpu_info():
- remote vCPU-s are paused rather than checked for being down (which in
  principle can change right after the check),
- the domain lock is taken for a much smaller region,
- the error code for an attempt to re-register the area is now -EBUSY,
- we could in principle permit de-registration when no area was
  previously registered (which would permit "probing", if necessary for
  anything).

Note that this eliminates a bug in copy_vcpu_settings(): The function
did allocate a new page regardless of the GFN already having a mapping,
thus in particular breaking the case of two vCPU-s having their info
areas on the same page.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: I'm not really certain whether the preliminary check (ahead of
     calling map_guest_area()) is worthwhile to have.
---
v2: Re-base over changes earlier in the series. Properly enforce no re-
    registration. Avoid several casts by introducing local variables.

--- a/xen/arch/x86/include/asm/shared.h
+++ b/xen/arch/x86/include/asm/shared.h
@@ -26,17 +26,20 @@ static inline void arch_set_##field(stru
 #define GET_SET_VCPU(type, field)                               \
 static inline type arch_get_##field(const struct vcpu *v)       \
 {                                                               \
+    const vcpu_info_t *vi = v->vcpu_info_area.map;              \
+                                                                \
     return !has_32bit_shinfo(v->domain) ?                       \
-           v->vcpu_info->native.arch.field :                    \
-           v->vcpu_info->compat.arch.field;                     \
+           vi->native.arch.field : vi->compat.arch.field;       \
 }                                                               \
 static inline void arch_set_##field(struct vcpu *v,             \
                                     type val)                   \
 {                                                               \
+    vcpu_info_t *vi = v->vcpu_info_area.map;                    \
+                                                                \
     if ( !has_32bit_shinfo(v->domain) )                         \
-        v->vcpu_info->native.arch.field = val;                  \
+        vi->native.arch.field = val;                            \
     else                                                        \
-        v->vcpu_info->compat.arch.field = val;                  \
+        vi->compat.arch.field = val;                            \
 }
 
 #else
@@ -57,12 +60,16 @@ static inline void arch_set_##field(stru
 #define GET_SET_VCPU(type, field)                           \
 static inline type arch_get_##field(const struct vcpu *v)   \
 {                                                           \
-    return v->vcpu_info->arch.field;                        \
+    const vcpu_info_t *vi = v->vcpu_info_area.map;          \
+                                                            \
+    return vi->arch.field;                                  \
 }                                                           \
 static inline void arch_set_##field(struct vcpu *v,         \
                                     type val)               \
 {                                                           \
-    v->vcpu_info->arch.field = val;                         \
+    vcpu_info_t *vi = v->vcpu_info_area.map;                \
+                                                            \
+    vi->arch.field = val;                                   \
 }
 
 #endif
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -1749,53 +1749,24 @@ static int copy_vpmu(struct vcpu *d_vcpu
 static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
 {
     unsigned int i;
-    struct p2m_domain *p2m = p2m_get_hostp2m(cd);
     int ret = -EINVAL;
 
     for ( i = 0; i < cd->max_vcpus; i++ )
     {
         struct vcpu *d_vcpu = d->vcpu[i];
         struct vcpu *cd_vcpu = cd->vcpu[i];
-        mfn_t vcpu_info_mfn;
 
         if ( !d_vcpu || !cd_vcpu )
             continue;
 
-        /* Copy & map in the vcpu_info page if the guest uses one */
-        vcpu_info_mfn = d_vcpu->vcpu_info_mfn;
-        if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) )
-        {
-            mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn;
-
-            /* Allocate & map the page for it if it hasn't been already */
-            if ( mfn_eq(new_vcpu_info_mfn, INVALID_MFN) )
-            {
-                gfn_t gfn = mfn_to_gfn(d, vcpu_info_mfn);
-                unsigned long gfn_l = gfn_x(gfn);
-                struct page_info *page;
-
-                if ( !(page = alloc_domheap_page(cd, 0)) )
-                    return -ENOMEM;
-
-                new_vcpu_info_mfn = page_to_mfn(page);
-                set_gpfn_from_mfn(mfn_x(new_vcpu_info_mfn), gfn_l);
-
-                ret = p2m->set_entry(p2m, gfn, new_vcpu_info_mfn,
-                                     PAGE_ORDER_4K, p2m_ram_rw,
-                                     p2m->default_access, -1);
-                if ( ret )
-                    return ret;
-
-                ret = map_vcpu_info(cd_vcpu, gfn_l,
-                                    PAGE_OFFSET(d_vcpu->vcpu_info));
-                if ( ret )
-                    return ret;
-            }
-
-            copy_domain_page(new_vcpu_info_mfn, vcpu_info_mfn);
-        }
-
-        /* Same for the (physically registered) runstate and time info areas. */
+        /*
+         * Copy and map the vcpu_info page and the (physically registered)
+         * runstate and time info areas.
+         */
+        ret = copy_guest_area(&cd_vcpu->vcpu_info_area,
+                              &d_vcpu->vcpu_info_area, cd_vcpu, d);
+        if ( ret )
+            return ret;
         ret = copy_guest_area(&cd_vcpu->runstate_guest_area,
                               &d_vcpu->runstate_guest_area, cd_vcpu, d);
         if ( ret )
--- a/xen/arch/x86/pv/shim.c
+++ b/xen/arch/x86/pv/shim.c
@@ -383,7 +383,7 @@ int pv_shim_shutdown(uint8_t reason)
     for_each_vcpu ( d, v )
     {
         /* Unmap guest vcpu_info page and runstate/time areas. */
-        unmap_vcpu_info(v);
+        unmap_guest_area(v, &v->vcpu_info_area);
         unmap_guest_area(v, &v->runstate_guest_area);
         unmap_guest_area(v, &v->arch.time_guest_area);
 
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1547,7 +1547,7 @@ static void __update_vcpu_system_time(st
     struct vcpu_time_info *u = &vcpu_info(v, time), _u;
     const struct domain *d = v->domain;
 
-    if ( v->vcpu_info == NULL )
+    if ( !v->vcpu_info_area.map )
         return;
 
     collect_time_info(v, &_u);
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -53,7 +53,7 @@ void __dummy__(void)
 
     OFFSET(VCPU_processor, struct vcpu, processor);
     OFFSET(VCPU_domain, struct vcpu, domain);
-    OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
+    OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
     OFFSET(VCPU_trap_bounce, struct vcpu, arch.pv.trap_bounce);
     OFFSET(VCPU_thread_flags, struct vcpu, arch.flags);
     OFFSET(VCPU_event_addr, struct vcpu, arch.pv.event_callback_eip);
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -96,7 +96,7 @@ static void _show_registers(
     if ( context == CTXT_hypervisor )
         printk(" %pS", _p(regs->rip));
     printk("\nRFLAGS: %016lx   ", regs->rflags);
-    if ( (context == CTXT_pv_guest) && v && v->vcpu_info )
+    if ( (context == CTXT_pv_guest) && v && v->vcpu_info_area.map )
         printk("EM: %d   ", !!vcpu_info(v, evtchn_upcall_mask));
     printk("CONTEXT: %s", context_names[context]);
     if ( v && !is_idle_vcpu(v) )
--- a/xen/common/compat/domain.c
+++ b/xen/common/compat/domain.c
@@ -49,7 +49,7 @@ int compat_common_vcpu_op(int cmd, struc
     {
     case VCPUOP_initialise:
     {
-        if ( v->vcpu_info == &dummy_vcpu_info )
+        if ( v->vcpu_info_area.map == &dummy_vcpu_info )
             return -EINVAL;
 
 #ifdef CONFIG_HVM
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -127,10 +127,10 @@ static void vcpu_info_reset(struct vcpu
 {
     struct domain *d = v->domain;
 
-    v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
-                    ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
-                    : &dummy_vcpu_info);
-    v->vcpu_info_mfn = INVALID_MFN;
+    v->vcpu_info_area.map =
+        ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
+         ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
+         : &dummy_vcpu_info);
 }
 
 static void vmtrace_free_buffer(struct vcpu *v)
@@ -964,7 +964,7 @@ int domain_kill(struct domain *d)
             return -ERESTART;
         for_each_vcpu ( d, v )
         {
-            unmap_vcpu_info(v);
+            unmap_guest_area(v, &v->vcpu_info_area);
             unmap_guest_area(v, &v->runstate_guest_area);
         }
         d->is_dying = DOMDYING_dead;
@@ -1419,7 +1419,7 @@ int domain_soft_reset(struct domain *d,
     for_each_vcpu ( d, v )
     {
         set_xen_guest_handle(runstate_guest(v), NULL);
-        unmap_vcpu_info(v);
+        unmap_guest_area(v, &v->vcpu_info_area);
         unmap_guest_area(v, &v->runstate_guest_area);
     }
 
@@ -1467,111 +1467,6 @@ int vcpu_reset(struct vcpu *v)
     return rc;
 }
 
-/*
- * Map a guest page in and point the vcpu_info pointer at it.  This
- * makes sure that the vcpu_info is always pointing at a valid piece
- * of memory, and it sets a pending event to make sure that a pending
- * event doesn't get missed.
- */
-int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned int offset)
-{
-    struct domain *d = v->domain;
-    void *mapping;
-    vcpu_info_t *new_info;
-    struct page_info *page;
-    unsigned int align;
-
-    if ( offset > (PAGE_SIZE - sizeof(*new_info)) )
-        return -ENXIO;
-
-#ifdef CONFIG_COMPAT
-    BUILD_BUG_ON(sizeof(*new_info) != sizeof(new_info->compat));
-    if ( has_32bit_shinfo(d) )
-        align = alignof(new_info->compat);
-    else
-#endif
-        align = alignof(*new_info);
-    if ( offset & (align - 1) )
-        return -ENXIO;
-
-    if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
-        return -EINVAL;
-
-    /* Run this command on yourself or on other offline VCPUS. */
-    if ( (v != current) && !(v->pause_flags & VPF_down) )
-        return -EINVAL;
-
-    page = get_page_from_gfn(d, gfn, NULL, P2M_UNSHARE);
-    if ( !page )
-        return -EINVAL;
-
-    if ( !get_page_type(page, PGT_writable_page) )
-    {
-        put_page(page);
-        return -EINVAL;
-    }
-
-    mapping = __map_domain_page_global(page);
-    if ( mapping == NULL )
-    {
-        put_page_and_type(page);
-        return -ENOMEM;
-    }
-
-    new_info = (vcpu_info_t *)(mapping + offset);
-
-    if ( v->vcpu_info == &dummy_vcpu_info )
-    {
-        memset(new_info, 0, sizeof(*new_info));
-#ifdef XEN_HAVE_PV_UPCALL_MASK
-        __vcpu_info(v, new_info, evtchn_upcall_mask) = 1;
-#endif
-    }
-    else
-    {
-        memcpy(new_info, v->vcpu_info, sizeof(*new_info));
-    }
-
-    v->vcpu_info = new_info;
-    v->vcpu_info_mfn = page_to_mfn(page);
-
-    /* Set new vcpu_info pointer /before/ setting pending flags. */
-    smp_wmb();
-
-    /*
-     * Mark everything as being pending just to make sure nothing gets
-     * lost.  The domain will get a spurious event, but it can cope.
-     */
-#ifdef CONFIG_COMPAT
-    if ( !has_32bit_shinfo(d) )
-        write_atomic(&new_info->native.evtchn_pending_sel, ~0);
-    else
-#endif
-        write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
-    vcpu_mark_events_pending(v);
-
-    return 0;
-}
-
-/*
- * Unmap the vcpu info page if the guest decided to place it somewhere
- * else. This is used from domain_kill() and domain_soft_reset().
- */
-void unmap_vcpu_info(struct vcpu *v)
-{
-    mfn_t mfn = v->vcpu_info_mfn;
-
-    if ( mfn_eq(mfn, INVALID_MFN) )
-        return;
-
-    unmap_domain_page_global((void *)
-                             ((unsigned long)v->vcpu_info & PAGE_MASK));
-
-    vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */
-
-    put_page_and_type(mfn_to_page(mfn));
-}
-
 int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size,
                    struct guest_area *area,
                    void (*populate)(void *dst, struct vcpu *v))
@@ -1633,14 +1528,44 @@ int map_guest_area(struct vcpu *v, paddr
 
     domain_lock(d);
 
-    if ( map )
-        populate(map, v);
+    /* No re-registration of the vCPU info area. */
+    if ( area != &v->vcpu_info_area || !area->pg )
+    {
+        if ( map )
+            populate(map, v);
 
-    SWAP(area->pg, pg);
-    SWAP(area->map, map);
+        SWAP(area->pg, pg);
+        SWAP(area->map, map);
+    }
+    else
+        rc = -EBUSY;
 
     domain_unlock(d);
 
+    /* Set pending flags /after/ new vcpu_info pointer was set. */
+    if ( area == &v->vcpu_info_area && !rc )
+    {
+        /*
+         * Mark everything as being pending just to make sure nothing gets
+         * lost.  The domain will get a spurious event, but it can cope.
+         */
+#ifdef CONFIG_COMPAT
+        if ( !has_32bit_shinfo(d) )
+        {
+            vcpu_info_t *info = area->map;
+
+            /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */
+            BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat));
+            write_atomic(&info->native.evtchn_pending_sel, ~0);
+        }
+        else
+#endif
+            write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
+        vcpu_mark_events_pending(v);
+
+        force_update_vcpu_system_time(v);
+    }
+
     if ( v != current )
         vcpu_unpause(v);
 
@@ -1670,7 +1595,10 @@ void unmap_guest_area(struct vcpu *v, st
 
     domain_lock(d);
     map = area->map;
-    area->map = NULL;
+    if ( area == &v->vcpu_info_area )
+        vcpu_info_reset(v);
+    else
+        area->map = NULL;
     pg = area->pg;
     area->pg = NULL;
     domain_unlock(d);
@@ -1801,6 +1729,27 @@ bool update_runstate_area(struct vcpu *v
     return rc;
 }
 
+/*
+ * This makes sure that the vcpu_info is always pointing at a valid piece of
+ * memory, and it sets a pending event to make sure that a pending event
+ * doesn't get missed.
+ */
+static void cf_check
+vcpu_info_populate(void *map, struct vcpu *v)
+{
+    vcpu_info_t *info = map;
+
+    if ( v->vcpu_info_area.map == &dummy_vcpu_info )
+    {
+        memset(info, 0, sizeof(*info));
+#ifdef XEN_HAVE_PV_UPCALL_MASK
+        __vcpu_info(v, info, evtchn_upcall_mask) = 1;
+#endif
+    }
+    else
+        memcpy(info, v->vcpu_info_area.map, sizeof(*info));
+}
+
 static void cf_check
 runstate_area_populate(void *map, struct vcpu *v)
 {
@@ -1830,7 +1779,7 @@ long common_vcpu_op(int cmd, struct vcpu
     switch ( cmd )
     {
     case VCPUOP_initialise:
-        if ( v->vcpu_info == &dummy_vcpu_info )
+        if ( v->vcpu_info_area.map == &dummy_vcpu_info )
             return -EINVAL;
 
         rc = arch_initialise_vcpu(v, arg);
@@ -1961,16 +1910,29 @@ long common_vcpu_op(int cmd, struct vcpu
     case VCPUOP_register_vcpu_info:
     {
         struct vcpu_register_vcpu_info info;
+        paddr_t gaddr;
 
         rc = -EFAULT;
         if ( copy_from_guest(&info, arg, 1) )
             break;
 
-        domain_lock(d);
-        rc = map_vcpu_info(v, info.mfn, info.offset);
-        domain_unlock(d);
+        rc = -EINVAL;
+        gaddr = gfn_to_gaddr(_gfn(info.mfn)) + info.offset;
+        if ( !~gaddr ||
+             gfn_x(gaddr_to_gfn(gaddr)) != info.mfn )
+            break;
 
-        force_update_vcpu_system_time(v);
+        /* Preliminary check only; see map_guest_area(). */
+        rc = -EBUSY;
+        if ( v->vcpu_info_area.pg )
+            break;
+
+        /* See the BUILD_BUG_ON() in vcpu_info_populate(). */
+        rc = map_guest_area(v, gaddr, sizeof(vcpu_info_t),
+                            &v->vcpu_info_area, vcpu_info_populate);
+        if ( rc == -ERESTART )
+            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
+                                               cmd, vcpuid, arg);
 
         break;
     }
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -79,9 +79,6 @@ void cf_check free_pirq_struct(void *);
 int  arch_vcpu_create(struct vcpu *v);
 void arch_vcpu_destroy(struct vcpu *v);
 
-int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned int offset);
-void unmap_vcpu_info(struct vcpu *v);
-
 int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size,
                    struct guest_area *area,
                    void (*populate)(void *dst, struct vcpu *v));
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -175,7 +175,7 @@ struct vcpu
 
     int              processor;
 
-    vcpu_info_t     *vcpu_info;
+    struct guest_area vcpu_info_area;
 
     struct domain   *domain;
 
@@ -288,9 +288,6 @@ struct vcpu
 
     struct waitqueue_vcpu *waitqueue_vcpu;
 
-    /* Guest-specified relocation of vcpu_info. */
-    mfn_t            vcpu_info_mfn;
-
     struct evtchn_fifo_vcpu *evtchn_fifo;
 
     /* vPCI per-vCPU area, used to store data for long running operations. */
--- a/xen/include/xen/shared.h
+++ b/xen/include/xen/shared.h
@@ -44,6 +44,7 @@ typedef struct vcpu_info vcpu_info_t;
 extern vcpu_info_t dummy_vcpu_info;
 
 #define shared_info(d, field)      __shared_info(d, (d)->shared_info, field)
-#define vcpu_info(v, field)        __vcpu_info(v, (v)->vcpu_info, field)
+#define vcpu_info(v, field)        \
+        __vcpu_info(v, (vcpu_info_t *)(v)->vcpu_info_area.map, field)
 
 #endif /* __XEN_SHARED_H__ */

Re: [PATCH v3 8/8] common: convert vCPU info area registration

Posted by Roger Pau Monné 2 years, 4 months ago

On Wed, May 03, 2023 at 05:58:30PM +0200, Jan Beulich wrote:
> Switch to using map_guest_area(). Noteworthy differences from
> map_vcpu_info():
> - remote vCPU-s are paused rather than checked for being down (which in
>   principle can change right after the check),
> - the domain lock is taken for a much smaller region,
> - the error code for an attempt to re-register the area is now -EBUSY,
> - we could in principle permit de-registration when no area was
>   previously registered (which would permit "probing", if necessary for
>   anything).
> 
> Note that this eliminates a bug in copy_vcpu_settings(): The function
> did allocate a new page regardless of the GFN already having a mapping,
> thus in particular breaking the case of two vCPU-s having their info
> areas on the same page.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Some minor comments below:

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> RFC: I'm not really certain whether the preliminary check (ahead of
>      calling map_guest_area()) is worthwhile to have.
> ---
> v2: Re-base over changes earlier in the series. Properly enforce no re-
>     registration. Avoid several casts by introducing local variables.
> 
> --- a/xen/arch/x86/include/asm/shared.h
> +++ b/xen/arch/x86/include/asm/shared.h
> @@ -26,17 +26,20 @@ static inline void arch_set_##field(stru
>  #define GET_SET_VCPU(type, field)                               \
>  static inline type arch_get_##field(const struct vcpu *v)       \
>  {                                                               \
> +    const vcpu_info_t *vi = v->vcpu_info_area.map;              \
> +                                                                \
>      return !has_32bit_shinfo(v->domain) ?                       \
> -           v->vcpu_info->native.arch.field :                    \
> -           v->vcpu_info->compat.arch.field;                     \
> +           vi->native.arch.field : vi->compat.arch.field;       \
>  }                                                               \
>  static inline void arch_set_##field(struct vcpu *v,             \
>                                      type val)                   \
>  {                                                               \
> +    vcpu_info_t *vi = v->vcpu_info_area.map;                    \
> +                                                                \
>      if ( !has_32bit_shinfo(v->domain) )                         \
> -        v->vcpu_info->native.arch.field = val;                  \
> +        vi->native.arch.field = val;                            \
>      else                                                        \
> -        v->vcpu_info->compat.arch.field = val;                  \
> +        vi->compat.arch.field = val;                            \
>  }
>  
>  #else
> @@ -57,12 +60,16 @@ static inline void arch_set_##field(stru
>  #define GET_SET_VCPU(type, field)                           \
>  static inline type arch_get_##field(const struct vcpu *v)   \
>  {                                                           \
> -    return v->vcpu_info->arch.field;                        \
> +    const vcpu_info_t *vi = v->vcpu_info_area.map;          \
> +                                                            \
> +    return vi->arch.field;                                  \
>  }                                                           \
>  static inline void arch_set_##field(struct vcpu *v,         \
>                                      type val)               \
>  {                                                           \
> -    v->vcpu_info->arch.field = val;                         \
> +    vcpu_info_t *vi = v->vcpu_info_area.map;                \
> +                                                            \
> +    vi->arch.field = val;                                   \
>  }
>  
>  #endif
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1749,53 +1749,24 @@ static int copy_vpmu(struct vcpu *d_vcpu
>  static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
>  {
>      unsigned int i;
> -    struct p2m_domain *p2m = p2m_get_hostp2m(cd);
>      int ret = -EINVAL;
>  
>      for ( i = 0; i < cd->max_vcpus; i++ )
>      {
>          struct vcpu *d_vcpu = d->vcpu[i];
>          struct vcpu *cd_vcpu = cd->vcpu[i];
> -        mfn_t vcpu_info_mfn;
>  
>          if ( !d_vcpu || !cd_vcpu )
>              continue;
>  
> -        /* Copy & map in the vcpu_info page if the guest uses one */
> -        vcpu_info_mfn = d_vcpu->vcpu_info_mfn;
> -        if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) )
> -        {
> -            mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn;
> -
> -            /* Allocate & map the page for it if it hasn't been already */
> -            if ( mfn_eq(new_vcpu_info_mfn, INVALID_MFN) )
> -            {
> -                gfn_t gfn = mfn_to_gfn(d, vcpu_info_mfn);
> -                unsigned long gfn_l = gfn_x(gfn);
> -                struct page_info *page;
> -
> -                if ( !(page = alloc_domheap_page(cd, 0)) )
> -                    return -ENOMEM;
> -
> -                new_vcpu_info_mfn = page_to_mfn(page);
> -                set_gpfn_from_mfn(mfn_x(new_vcpu_info_mfn), gfn_l);
> -
> -                ret = p2m->set_entry(p2m, gfn, new_vcpu_info_mfn,
> -                                     PAGE_ORDER_4K, p2m_ram_rw,
> -                                     p2m->default_access, -1);
> -                if ( ret )
> -                    return ret;
> -
> -                ret = map_vcpu_info(cd_vcpu, gfn_l,
> -                                    PAGE_OFFSET(d_vcpu->vcpu_info));
> -                if ( ret )
> -                    return ret;
> -            }
> -
> -            copy_domain_page(new_vcpu_info_mfn, vcpu_info_mfn);
> -        }
> -
> -        /* Same for the (physically registered) runstate and time info areas. */
> +        /*
> +         * Copy and map the vcpu_info page and the (physically registered)
> +         * runstate and time info areas.
> +         */
> +        ret = copy_guest_area(&cd_vcpu->vcpu_info_area,
> +                              &d_vcpu->vcpu_info_area, cd_vcpu, d);
> +        if ( ret )
> +            return ret;
>          ret = copy_guest_area(&cd_vcpu->runstate_guest_area,
>                                &d_vcpu->runstate_guest_area, cd_vcpu, d);
>          if ( ret )
> --- a/xen/arch/x86/pv/shim.c
> +++ b/xen/arch/x86/pv/shim.c
> @@ -383,7 +383,7 @@ int pv_shim_shutdown(uint8_t reason)
>      for_each_vcpu ( d, v )
>      {
>          /* Unmap guest vcpu_info page and runstate/time areas. */
> -        unmap_vcpu_info(v);
> +        unmap_guest_area(v, &v->vcpu_info_area);
>          unmap_guest_area(v, &v->runstate_guest_area);
>          unmap_guest_area(v, &v->arch.time_guest_area);
>  
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -1547,7 +1547,7 @@ static void __update_vcpu_system_time(st
>      struct vcpu_time_info *u = &vcpu_info(v, time), _u;
>      const struct domain *d = v->domain;
>  
> -    if ( v->vcpu_info == NULL )
> +    if ( !v->vcpu_info_area.map )
>          return;
>  
>      collect_time_info(v, &_u);
> --- a/xen/arch/x86/x86_64/asm-offsets.c
> +++ b/xen/arch/x86/x86_64/asm-offsets.c
> @@ -53,7 +53,7 @@ void __dummy__(void)
>  
>      OFFSET(VCPU_processor, struct vcpu, processor);
>      OFFSET(VCPU_domain, struct vcpu, domain);
> -    OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
> +    OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
>      OFFSET(VCPU_trap_bounce, struct vcpu, arch.pv.trap_bounce);
>      OFFSET(VCPU_thread_flags, struct vcpu, arch.flags);
>      OFFSET(VCPU_event_addr, struct vcpu, arch.pv.event_callback_eip);
> --- a/xen/arch/x86/x86_64/traps.c
> +++ b/xen/arch/x86/x86_64/traps.c
> @@ -96,7 +96,7 @@ static void _show_registers(
>      if ( context == CTXT_hypervisor )
>          printk(" %pS", _p(regs->rip));
>      printk("\nRFLAGS: %016lx   ", regs->rflags);
> -    if ( (context == CTXT_pv_guest) && v && v->vcpu_info )
> +    if ( (context == CTXT_pv_guest) && v && v->vcpu_info_area.map )
>          printk("EM: %d   ", !!vcpu_info(v, evtchn_upcall_mask));
>      printk("CONTEXT: %s", context_names[context]);
>      if ( v && !is_idle_vcpu(v) )
> --- a/xen/common/compat/domain.c
> +++ b/xen/common/compat/domain.c
> @@ -49,7 +49,7 @@ int compat_common_vcpu_op(int cmd, struc
>      {
>      case VCPUOP_initialise:
>      {
> -        if ( v->vcpu_info == &dummy_vcpu_info )
> +        if ( v->vcpu_info_area.map == &dummy_vcpu_info )
>              return -EINVAL;
>  
>  #ifdef CONFIG_HVM
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -127,10 +127,10 @@ static void vcpu_info_reset(struct vcpu
>  {
>      struct domain *d = v->domain;

d could likely be made const?

>  
> -    v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
> -                    ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
> -                    : &dummy_vcpu_info);
> -    v->vcpu_info_mfn = INVALID_MFN;
> +    v->vcpu_info_area.map =
> +        ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
> +         ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
> +         : &dummy_vcpu_info);
>  }
>  
>  static void vmtrace_free_buffer(struct vcpu *v)
> @@ -964,7 +964,7 @@ int domain_kill(struct domain *d)
>              return -ERESTART;
>          for_each_vcpu ( d, v )
>          {
> -            unmap_vcpu_info(v);
> +            unmap_guest_area(v, &v->vcpu_info_area);
>              unmap_guest_area(v, &v->runstate_guest_area);
>          }
>          d->is_dying = DOMDYING_dead;
> @@ -1419,7 +1419,7 @@ int domain_soft_reset(struct domain *d,
>      for_each_vcpu ( d, v )
>      {
>          set_xen_guest_handle(runstate_guest(v), NULL);
> -        unmap_vcpu_info(v);
> +        unmap_guest_area(v, &v->vcpu_info_area);
>          unmap_guest_area(v, &v->runstate_guest_area);
>      }
>  
> @@ -1467,111 +1467,6 @@ int vcpu_reset(struct vcpu *v)
>      return rc;
>  }
>  
> -/*
> - * Map a guest page in and point the vcpu_info pointer at it.  This
> - * makes sure that the vcpu_info is always pointing at a valid piece
> - * of memory, and it sets a pending event to make sure that a pending
> - * event doesn't get missed.
> - */
> -int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned int offset)
> -{
> -    struct domain *d = v->domain;
> -    void *mapping;
> -    vcpu_info_t *new_info;
> -    struct page_info *page;
> -    unsigned int align;
> -
> -    if ( offset > (PAGE_SIZE - sizeof(*new_info)) )
> -        return -ENXIO;
> -
> -#ifdef CONFIG_COMPAT
> -    BUILD_BUG_ON(sizeof(*new_info) != sizeof(new_info->compat));
> -    if ( has_32bit_shinfo(d) )
> -        align = alignof(new_info->compat);
> -    else
> -#endif
> -        align = alignof(*new_info);
> -    if ( offset & (align - 1) )
> -        return -ENXIO;
> -
> -    if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
> -        return -EINVAL;
> -
> -    /* Run this command on yourself or on other offline VCPUS. */
> -    if ( (v != current) && !(v->pause_flags & VPF_down) )
> -        return -EINVAL;
> -
> -    page = get_page_from_gfn(d, gfn, NULL, P2M_UNSHARE);
> -    if ( !page )
> -        return -EINVAL;
> -
> -    if ( !get_page_type(page, PGT_writable_page) )
> -    {
> -        put_page(page);
> -        return -EINVAL;
> -    }
> -
> -    mapping = __map_domain_page_global(page);
> -    if ( mapping == NULL )
> -    {
> -        put_page_and_type(page);
> -        return -ENOMEM;
> -    }
> -
> -    new_info = (vcpu_info_t *)(mapping + offset);
> -
> -    if ( v->vcpu_info == &dummy_vcpu_info )
> -    {
> -        memset(new_info, 0, sizeof(*new_info));
> -#ifdef XEN_HAVE_PV_UPCALL_MASK
> -        __vcpu_info(v, new_info, evtchn_upcall_mask) = 1;
> -#endif
> -    }
> -    else
> -    {
> -        memcpy(new_info, v->vcpu_info, sizeof(*new_info));
> -    }
> -
> -    v->vcpu_info = new_info;
> -    v->vcpu_info_mfn = page_to_mfn(page);
> -
> -    /* Set new vcpu_info pointer /before/ setting pending flags. */
> -    smp_wmb();
> -
> -    /*
> -     * Mark everything as being pending just to make sure nothing gets
> -     * lost.  The domain will get a spurious event, but it can cope.
> -     */
> -#ifdef CONFIG_COMPAT
> -    if ( !has_32bit_shinfo(d) )
> -        write_atomic(&new_info->native.evtchn_pending_sel, ~0);
> -    else
> -#endif
> -        write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
> -    vcpu_mark_events_pending(v);
> -
> -    return 0;
> -}
> -
> -/*
> - * Unmap the vcpu info page if the guest decided to place it somewhere
> - * else. This is used from domain_kill() and domain_soft_reset().
> - */
> -void unmap_vcpu_info(struct vcpu *v)
> -{
> -    mfn_t mfn = v->vcpu_info_mfn;
> -
> -    if ( mfn_eq(mfn, INVALID_MFN) )
> -        return;
> -
> -    unmap_domain_page_global((void *)
> -                             ((unsigned long)v->vcpu_info & PAGE_MASK));
> -
> -    vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */
> -
> -    put_page_and_type(mfn_to_page(mfn));
> -}
> -
>  int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size,
>                     struct guest_area *area,
>                     void (*populate)(void *dst, struct vcpu *v))
> @@ -1633,14 +1528,44 @@ int map_guest_area(struct vcpu *v, paddr
>  
>      domain_lock(d);
>  
> -    if ( map )
> -        populate(map, v);
> +    /* No re-registration of the vCPU info area. */
> +    if ( area != &v->vcpu_info_area || !area->pg )

It would be nice if this check could be done earlier, as to avoid
having to fetch and map the page just to discard it.  That would
however require taking the domain lock earlier.

> +    {
> +        if ( map )
> +            populate(map, v);
>  
> -    SWAP(area->pg, pg);
> -    SWAP(area->map, map);
> +        SWAP(area->pg, pg);
> +        SWAP(area->map, map);
> +    }
> +    else
> +        rc = -EBUSY;
>  
>      domain_unlock(d);
>  
> +    /* Set pending flags /after/ new vcpu_info pointer was set. */
> +    if ( area == &v->vcpu_info_area && !rc )
> +    {
> +        /*
> +         * Mark everything as being pending just to make sure nothing gets
> +         * lost.  The domain will get a spurious event, but it can cope.
> +         */
> +#ifdef CONFIG_COMPAT
> +        if ( !has_32bit_shinfo(d) )
> +        {
> +            vcpu_info_t *info = area->map;
> +
> +            /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */
> +            BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat));
> +            write_atomic(&info->native.evtchn_pending_sel, ~0);
> +        }
> +        else
> +#endif
> +            write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);

Can't the setting of evtchn_pending_sel be done in
vcpu_info_populate()?

> +        vcpu_mark_events_pending(v);
> +
> +        force_update_vcpu_system_time(v);
> +    }
> +
>      if ( v != current )
>          vcpu_unpause(v);
>  
> @@ -1670,7 +1595,10 @@ void unmap_guest_area(struct vcpu *v, st
>  
>      domain_lock(d);
>      map = area->map;
> -    area->map = NULL;
> +    if ( area == &v->vcpu_info_area )
> +        vcpu_info_reset(v);
> +    else
> +        area->map = NULL;
>      pg = area->pg;
>      area->pg = NULL;
>      domain_unlock(d);
> @@ -1801,6 +1729,27 @@ bool update_runstate_area(struct vcpu *v
>      return rc;
>  }
>  
> +/*
> + * This makes sure that the vcpu_info is always pointing at a valid piece of
> + * memory, and it sets a pending event to make sure that a pending event
> + * doesn't get missed.
> + */
> +static void cf_check
> +vcpu_info_populate(void *map, struct vcpu *v)
> +{
> +    vcpu_info_t *info = map;
> +
> +    if ( v->vcpu_info_area.map == &dummy_vcpu_info )
> +    {
> +        memset(info, 0, sizeof(*info));
> +#ifdef XEN_HAVE_PV_UPCALL_MASK
> +        __vcpu_info(v, info, evtchn_upcall_mask) = 1;
> +#endif

I'm not sure about the point of those guards, this will always be 1,
as we always build the hypervisor with the headers in xen/public?

Is it to make backports easier?

Thanks, Roger.

Re: [PATCH v3 8/8] common: convert vCPU info area registration

Posted by Jan Beulich 2 years, 4 months ago

On 28.09.2023 10:24, Roger Pau Monné wrote:
> On Wed, May 03, 2023 at 05:58:30PM +0200, Jan Beulich wrote:
>> Switch to using map_guest_area(). Noteworthy differences from
>> map_vcpu_info():
>> - remote vCPU-s are paused rather than checked for being down (which in
>>   principle can change right after the check),
>> - the domain lock is taken for a much smaller region,
>> - the error code for an attempt to re-register the area is now -EBUSY,
>> - we could in principle permit de-registration when no area was
>>   previously registered (which would permit "probing", if necessary for
>>   anything).
>>
>> Note that this eliminates a bug in copy_vcpu_settings(): The function
>> did allocate a new page regardless of the GFN already having a mapping,
>> thus in particular breaking the case of two vCPU-s having their info
>> areas on the same page.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Some minor comments below:
> 
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks.

>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -127,10 +127,10 @@ static void vcpu_info_reset(struct vcpu
>>  {
>>      struct domain *d = v->domain;
> 
> d could likely be made const?

Probably, but this is just context here.

>> @@ -1633,14 +1528,44 @@ int map_guest_area(struct vcpu *v, paddr
>>  
>>      domain_lock(d);
>>  
>> -    if ( map )
>> -        populate(map, v);
>> +    /* No re-registration of the vCPU info area. */
>> +    if ( area != &v->vcpu_info_area || !area->pg )
> 
> It would be nice if this check could be done earlier, as to avoid
> having to fetch and map the page just to discard it.  That would
> however require taking the domain lock earlier.

In order to kind of balance the conflicting goals, there is an unlocked
early check in the caller. See also the related RFC remark; I might
interpret your remark as "yes, keep the early check".

>> +    {
>> +        if ( map )
>> +            populate(map, v);
>>  
>> -    SWAP(area->pg, pg);
>> -    SWAP(area->map, map);
>> +        SWAP(area->pg, pg);
>> +        SWAP(area->map, map);
>> +    }
>> +    else
>> +        rc = -EBUSY;
>>  
>>      domain_unlock(d);
>>  
>> +    /* Set pending flags /after/ new vcpu_info pointer was set. */
>> +    if ( area == &v->vcpu_info_area && !rc )
>> +    {
>> +        /*
>> +         * Mark everything as being pending just to make sure nothing gets
>> +         * lost.  The domain will get a spurious event, but it can cope.
>> +         */
>> +#ifdef CONFIG_COMPAT
>> +        if ( !has_32bit_shinfo(d) )
>> +        {
>> +            vcpu_info_t *info = area->map;
>> +
>> +            /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */
>> +            BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat));
>> +            write_atomic(&info->native.evtchn_pending_sel, ~0);
>> +        }
>> +        else
>> +#endif
>> +            write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
> 
> Can't the setting of evtchn_pending_sel be done in
> vcpu_info_populate()?

No, see the comment ahead of the outer if(). populate() needs calling
ahead of updating the pointer.

>> @@ -1801,6 +1729,27 @@ bool update_runstate_area(struct vcpu *v
>>      return rc;
>>  }
>>  
>> +/*
>> + * This makes sure that the vcpu_info is always pointing at a valid piece of
>> + * memory, and it sets a pending event to make sure that a pending event
>> + * doesn't get missed.
>> + */
>> +static void cf_check
>> +vcpu_info_populate(void *map, struct vcpu *v)
>> +{
>> +    vcpu_info_t *info = map;
>> +
>> +    if ( v->vcpu_info_area.map == &dummy_vcpu_info )
>> +    {
>> +        memset(info, 0, sizeof(*info));
>> +#ifdef XEN_HAVE_PV_UPCALL_MASK
>> +        __vcpu_info(v, info, evtchn_upcall_mask) = 1;
>> +#endif
> 
> I'm not sure about the point of those guards, this will always be 1,
> as we always build the hypervisor with the headers in xen/public?

That's the case on x86, but this is common code, and hence the build would
break on other architectures if the guard wasn't there.

> Is it to make backports easier?

I'm afraid I don't see how backporting considerations could play into here.

Jan

Re: [PATCH v3 8/8] common: convert vCPU info area registration

Posted by Roger Pau Monné 2 years, 4 months ago

On Thu, Sep 28, 2023 at 11:53:36AM +0200, Jan Beulich wrote:
> On 28.09.2023 10:24, Roger Pau Monné wrote:
> > On Wed, May 03, 2023 at 05:58:30PM +0200, Jan Beulich wrote:
> >> Switch to using map_guest_area(). Noteworthy differences from
> >> map_vcpu_info():
> >> - remote vCPU-s are paused rather than checked for being down (which in
> >>   principle can change right after the check),
> >> - the domain lock is taken for a much smaller region,
> >> - the error code for an attempt to re-register the area is now -EBUSY,
> >> - we could in principle permit de-registration when no area was
> >>   previously registered (which would permit "probing", if necessary for
> >>   anything).
> >>
> >> Note that this eliminates a bug in copy_vcpu_settings(): The function
> >> did allocate a new page regardless of the GFN already having a mapping,
> >> thus in particular breaking the case of two vCPU-s having their info
> >> areas on the same page.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> > 
> > Some minor comments below:
> > 
> > Acked-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Thanks.
> 
> >> --- a/xen/common/domain.c
> >> +++ b/xen/common/domain.c
> >> @@ -127,10 +127,10 @@ static void vcpu_info_reset(struct vcpu
> >>  {
> >>      struct domain *d = v->domain;
> > 
> > d could likely be made const?
> 
> Probably, but this is just context here.
> 
> >> @@ -1633,14 +1528,44 @@ int map_guest_area(struct vcpu *v, paddr
> >>  
> >>      domain_lock(d);
> >>  
> >> -    if ( map )
> >> -        populate(map, v);
> >> +    /* No re-registration of the vCPU info area. */
> >> +    if ( area != &v->vcpu_info_area || !area->pg )
> > 
> > It would be nice if this check could be done earlier, as to avoid
> > having to fetch and map the page just to discard it.  That would
> > however require taking the domain lock earlier.
> 
> In order to kind of balance the conflicting goals, there is an unlocked
> early check in the caller. See also the related RFC remark; I might
> interpret your remark as "yes, keep the early check".

Oh, yes, do keep the check.

> >> +    {
> >> +        if ( map )
> >> +            populate(map, v);
> >>  
> >> -    SWAP(area->pg, pg);
> >> -    SWAP(area->map, map);
> >> +        SWAP(area->pg, pg);
> >> +        SWAP(area->map, map);
> >> +    }
> >> +    else
> >> +        rc = -EBUSY;
> >>  
> >>      domain_unlock(d);
> >>  
> >> +    /* Set pending flags /after/ new vcpu_info pointer was set. */
> >> +    if ( area == &v->vcpu_info_area && !rc )
> >> +    {
> >> +        /*
> >> +         * Mark everything as being pending just to make sure nothing gets
> >> +         * lost.  The domain will get a spurious event, but it can cope.
> >> +         */
> >> +#ifdef CONFIG_COMPAT
> >> +        if ( !has_32bit_shinfo(d) )
> >> +        {
> >> +            vcpu_info_t *info = area->map;
> >> +
> >> +            /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */
> >> +            BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat));
> >> +            write_atomic(&info->native.evtchn_pending_sel, ~0);
> >> +        }
> >> +        else
> >> +#endif
> >> +            write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
> > 
> > Can't the setting of evtchn_pending_sel be done in
> > vcpu_info_populate()?
> 
> No, see the comment ahead of the outer if(). populate() needs calling
> ahead of updating the pointer.

I'm afraid I don't obviously see why evtchn_pending_sel can't be set
before v->vcpu_info is updated.  It will end up being ~0 anyway,
regardless of the order of operations, and we do force an event
channel injection.  There's something I'm clearly missing.

vcpu_mark_events_pending() and force_update_vcpu_system_time()
obviously cannot be moved to the populate hook.

> >> @@ -1801,6 +1729,27 @@ bool update_runstate_area(struct vcpu *v
> >>      return rc;
> >>  }
> >>  
> >> +/*
> >> + * This makes sure that the vcpu_info is always pointing at a valid piece of
> >> + * memory, and it sets a pending event to make sure that a pending event
> >> + * doesn't get missed.
> >> + */
> >> +static void cf_check
> >> +vcpu_info_populate(void *map, struct vcpu *v)
> >> +{
> >> +    vcpu_info_t *info = map;
> >> +
> >> +    if ( v->vcpu_info_area.map == &dummy_vcpu_info )
> >> +    {
> >> +        memset(info, 0, sizeof(*info));
> >> +#ifdef XEN_HAVE_PV_UPCALL_MASK
> >> +        __vcpu_info(v, info, evtchn_upcall_mask) = 1;
> >> +#endif
> > 
> > I'm not sure about the point of those guards, this will always be 1,
> > as we always build the hypervisor with the headers in xen/public?
> 
> That's the case on x86, but this is common code, and hence the build would
> break on other architectures if the guard wasn't there.

Ah, I see, sorry for the noise.

Thanks, Roger.

Re: [PATCH v3 8/8] common: convert vCPU info area registration

Posted by Jan Beulich 2 years, 4 months ago

On 28.09.2023 12:15, Roger Pau Monné wrote:
> On Thu, Sep 28, 2023 at 11:53:36AM +0200, Jan Beulich wrote:
>> On 28.09.2023 10:24, Roger Pau Monné wrote:
>>> On Wed, May 03, 2023 at 05:58:30PM +0200, Jan Beulich wrote:
>>>> @@ -1633,14 +1528,44 @@ int map_guest_area(struct vcpu *v, paddr
>>>>  
>>>>      domain_lock(d);
>>>>  
>>>> -    if ( map )
>>>> -        populate(map, v);
>>>> +    /* No re-registration of the vCPU info area. */
>>>> +    if ( area != &v->vcpu_info_area || !area->pg )
>>>
>>> It would be nice if this check could be done earlier, as to avoid
>>> having to fetch and map the page just to discard it.  That would
>>> however require taking the domain lock earlier.
>>
>> In order to kind of balance the conflicting goals, there is an unlocked
>> early check in the caller. See also the related RFC remark; I might
>> interpret your remark as "yes, keep the early check".
> 
> Oh, yes, do keep the check.
> 
>>>> +    {
>>>> +        if ( map )
>>>> +            populate(map, v);
>>>>  
>>>> -    SWAP(area->pg, pg);
>>>> -    SWAP(area->map, map);
>>>> +        SWAP(area->pg, pg);
>>>> +        SWAP(area->map, map);
>>>> +    }
>>>> +    else
>>>> +        rc = -EBUSY;
>>>>  
>>>>      domain_unlock(d);
>>>>  
>>>> +    /* Set pending flags /after/ new vcpu_info pointer was set. */
>>>> +    if ( area == &v->vcpu_info_area && !rc )
>>>> +    {
>>>> +        /*
>>>> +         * Mark everything as being pending just to make sure nothing gets
>>>> +         * lost.  The domain will get a spurious event, but it can cope.
>>>> +         */
>>>> +#ifdef CONFIG_COMPAT
>>>> +        if ( !has_32bit_shinfo(d) )
>>>> +        {
>>>> +            vcpu_info_t *info = area->map;
>>>> +
>>>> +            /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */
>>>> +            BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat));
>>>> +            write_atomic(&info->native.evtchn_pending_sel, ~0);
>>>> +        }
>>>> +        else
>>>> +#endif
>>>> +            write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
>>>
>>> Can't the setting of evtchn_pending_sel be done in
>>> vcpu_info_populate()?
>>
>> No, see the comment ahead of the outer if(). populate() needs calling
>> ahead of updating the pointer.
> 
> I'm afraid I don't obviously see why evtchn_pending_sel can't be set
> before v->vcpu_info is updated.  It will end up being ~0 anyway,
> regardless of the order of operations, and we do force an event
> channel injection.  There's something I'm clearly missing.

Considering the target vCPU is paused (or is current), doing so may be
possible (albeit I fear I'm overlooking something). But re-ordering of
operations shouldn't be done as a side effect of this change; if the
original ordering constraints were too strict, then imo relaxing should
either be a separate earlier change, or a separate follow-on one.

Jan

Re: [PATCH v3 8/8] common: convert vCPU info area registration

Posted by Roger Pau Monné 2 years, 4 months ago

On Thu, Sep 28, 2023 at 12:35:23PM +0200, Jan Beulich wrote:
> On 28.09.2023 12:15, Roger Pau Monné wrote:
> > On Thu, Sep 28, 2023 at 11:53:36AM +0200, Jan Beulich wrote:
> >> On 28.09.2023 10:24, Roger Pau Monné wrote:
> >>> On Wed, May 03, 2023 at 05:58:30PM +0200, Jan Beulich wrote:
> >>>> @@ -1633,14 +1528,44 @@ int map_guest_area(struct vcpu *v, paddr
> >>>> +    {
> >>>> +        if ( map )
> >>>> +            populate(map, v);
> >>>>  
> >>>> -    SWAP(area->pg, pg);
> >>>> -    SWAP(area->map, map);
> >>>> +        SWAP(area->pg, pg);
> >>>> +        SWAP(area->map, map);
> >>>> +    }
> >>>> +    else
> >>>> +        rc = -EBUSY;
> >>>>  
> >>>>      domain_unlock(d);
> >>>>  
> >>>> +    /* Set pending flags /after/ new vcpu_info pointer was set. */
> >>>> +    if ( area == &v->vcpu_info_area && !rc )
> >>>> +    {
> >>>> +        /*
> >>>> +         * Mark everything as being pending just to make sure nothing gets
> >>>> +         * lost.  The domain will get a spurious event, but it can cope.
> >>>> +         */
> >>>> +#ifdef CONFIG_COMPAT
> >>>> +        if ( !has_32bit_shinfo(d) )
> >>>> +        {
> >>>> +            vcpu_info_t *info = area->map;
> >>>> +
> >>>> +            /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */
> >>>> +            BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat));
> >>>> +            write_atomic(&info->native.evtchn_pending_sel, ~0);
> >>>> +        }
> >>>> +        else
> >>>> +#endif
> >>>> +            write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
> >>>
> >>> Can't the setting of evtchn_pending_sel be done in
> >>> vcpu_info_populate()?
> >>
> >> No, see the comment ahead of the outer if(). populate() needs calling
> >> ahead of updating the pointer.
> > 
> > I'm afraid I don't obviously see why evtchn_pending_sel can't be set
> > before v->vcpu_info is updated.  It will end up being ~0 anyway,
> > regardless of the order of operations, and we do force an event
> > channel injection.  There's something I'm clearly missing.
> 
> Considering the target vCPU is paused (or is current), doing so may be
> possible (albeit I fear I'm overlooking something). But re-ordering of
> operations shouldn't be done as a side effect of this change; if the
> original ordering constraints were too strict, then imo relaxing should
> either be a separate earlier change, or a separate follow-on one.

Let's do a followup then.  This will also need an RB instead of an
Ack:

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.