[PATCH v2 0/4] x86: shim building adjustments (plus shadow follow-on)

Jan Beulich posted 4 patches 3 years, 7 months ago
Only 0 patches received!
There is a newer version of this series
[PATCH v2 0/4] x86: shim building adjustments (plus shadow follow-on)
Posted by Jan Beulich 3 years, 7 months ago
The first change is simply addressing a build issue noticed by
Andrew. The build breakage goes beyond this specific combination
though - PV_SHIM_EXCLUSIVE plus HVM is similarly an issue. This
is what the last patch tries to take care of, in a shape already
on irc noticed to be controversial. I'm submitting the change
nevertheless because for the moment there looks to be a majority
in favor of going this route. One argument not voiced there yet:
What good does it do to allow a user to enable HVM when then on
the resulting hypervisor they still can't run HVM guests (for
the hypervisor still being a dedicated PV shim one). On top of
this, the alternative approach is likely going to get ugly.

1: fix build with PV_SHIM_EXCLUSIVE and SHADOW_PAGING
2: adjust Kconfig defaults
3: don't permit HVM and PV_SHIM_EXCLUSIVE at the same time
4: refactor shadow_vram_{get,put}_l1e()

Jan

[PATCH v2 1/4] x86/shim: fix build with PV_SHIM_EXCLUSIVE and SHADOW_PAGING
Posted by Jan Beulich 3 years, 7 months ago
While there's little point in enabling both, the combination ought to at
least build correctly. Drop the direct PV_SHIM_EXCLUSIVE conditionals
and instead zap PG_log_dirty to zero under the right conditions, and key
other #ifdef-s off of that.

While there also expand on ded576ce07e9 ("x86/shadow: dirty VRAM
tracking is needed for HVM only"): There was yet another is_hvm_domain()
missing, and code touching the struct fields needs to be guarded by
suitable #ifdef-s as well. While there also guard shadow-mode-only
fields accordingly.

Fixes: 8b5b49ceb3d9 ("x86: don't include domctl and alike in shim-exclusive builds")
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -47,7 +47,7 @@
 /* Per-CPU variable for enforcing the lock ordering */
 DEFINE_PER_CPU(int, mm_lock_level);
 
-#ifndef CONFIG_PV_SHIM_EXCLUSIVE
+#if PG_log_dirty
 
 /************************************************/
 /*              LOG DIRTY SUPPORT               */
@@ -630,7 +630,7 @@ void paging_log_dirty_init(struct domain
     d->arch.paging.log_dirty.ops = ops;
 }
 
-#endif /* CONFIG_PV_SHIM_EXCLUSIVE */
+#endif /* PG_log_dirty */
 
 /************************************************/
 /*           CODE FOR PAGING SUPPORT            */
@@ -671,7 +671,7 @@ void paging_vcpu_init(struct vcpu *v)
         shadow_vcpu_init(v);
 }
 
-#ifndef CONFIG_PV_SHIM_EXCLUSIVE
+#if PG_log_dirty
 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
                   bool_t resuming)
@@ -792,7 +792,7 @@ long paging_domctl_continuation(XEN_GUES
 
     return ret;
 }
-#endif /* CONFIG_PV_SHIM_EXCLUSIVE */
+#endif /* PG_log_dirty */
 
 /* Call when destroying a domain */
 int paging_teardown(struct domain *d)
@@ -808,7 +808,7 @@ int paging_teardown(struct domain *d)
     if ( preempted )
         return -ERESTART;
 
-#ifndef CONFIG_PV_SHIM_EXCLUSIVE
+#if PG_log_dirty
     /* clean up log dirty resources. */
     rc = paging_free_log_dirty_bitmap(d, 0);
     if ( rc == -ERESTART )
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -2869,12 +2869,14 @@ void shadow_teardown(struct domain *d, b
      * calls now that we've torn down the bitmap */
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    if ( d->arch.hvm.dirty_vram )
+#ifdef CONFIG_HVM
+    if ( is_hvm_domain(d) && d->arch.hvm.dirty_vram )
     {
         xfree(d->arch.hvm.dirty_vram->sl1ma);
         xfree(d->arch.hvm.dirty_vram->dirty_bitmap);
         XFREE(d->arch.hvm.dirty_vram);
     }
+#endif
 
 out:
     paging_unlock(d);
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -618,6 +618,7 @@ _sh_propagate(struct vcpu *v,
         }
     }
 
+#ifdef CONFIG_HVM
     if ( unlikely(level == 1) && is_hvm_domain(d) )
     {
         struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
@@ -632,6 +633,7 @@ _sh_propagate(struct vcpu *v,
                 sflags &= ~_PAGE_RW;
         }
     }
+#endif
 
     /* Read-only memory */
     if ( p2m_is_readonly(p2mt) )
@@ -1050,6 +1052,7 @@ static inline void shadow_vram_get_l1e(s
                                        mfn_t sl1mfn,
                                        struct domain *d)
 {
+#ifdef CONFIG_HVM
     mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
     int flags = shadow_l1e_get_flags(new_sl1e);
     unsigned long gfn;
@@ -1074,6 +1077,7 @@ static inline void shadow_vram_get_l1e(s
             dirty_vram->sl1ma[i] = mfn_to_maddr(sl1mfn)
                 | ((unsigned long)sl1e & ~PAGE_MASK);
     }
+#endif
 }
 
 static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
@@ -1081,6 +1085,7 @@ static inline void shadow_vram_put_l1e(s
                                        mfn_t sl1mfn,
                                        struct domain *d)
 {
+#ifdef CONFIG_HVM
     mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
     int flags = shadow_l1e_get_flags(old_sl1e);
     unsigned long gfn;
@@ -1140,6 +1145,7 @@ static inline void shadow_vram_put_l1e(s
             dirty_vram->last_dirty = NOW();
         }
     }
+#endif
 }
 
 static int shadow_set_l1e(struct domain *d,
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -67,8 +67,12 @@
 #define PG_translate   0
 #define PG_external    0
 #endif
+#if defined(CONFIG_HVM) || !defined(CONFIG_PV_SHIM_EXCLUSIVE)
 /* Enable log dirty mode */
 #define PG_log_dirty   (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
+#else
+#define PG_log_dirty   0
+#endif
 
 /* All paging modes. */
 #define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
@@ -154,7 +158,7 @@ struct paging_mode {
 /*****************************************************************************
  * Log dirty code */
 
-#ifndef CONFIG_PV_SHIM_EXCLUSIVE
+#if PG_log_dirty
 
 /* get the dirty bitmap for a specific range of pfns */
 void paging_log_dirty_range(struct domain *d,
@@ -195,23 +199,28 @@ int paging_mfn_is_dirty(struct domain *d
 #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
                               (LOGDIRTY_NODE_ENTRIES-1))
 
+#ifdef CONFIG_HVM
 /* VRAM dirty tracking support */
 struct sh_dirty_vram {
     unsigned long begin_pfn;
     unsigned long end_pfn;
+#ifdef CONFIG_SHADOW_PAGING
     paddr_t *sl1ma;
     uint8_t *dirty_bitmap;
     s_time_t last_dirty;
+#endif
 };
+#endif
 
-#else /* !CONFIG_PV_SHIM_EXCLUSIVE */
+#else /* !PG_log_dirty */
 
 static inline void paging_log_dirty_init(struct domain *d,
                                          const struct log_dirty_ops *ops) {}
 static inline void paging_mark_dirty(struct domain *d, mfn_t gmfn) {}
 static inline void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn) {}
+static inline bool paging_mfn_is_dirty(struct domain *d, mfn_t gmfn) { return false; }
 
-#endif /* CONFIG_PV_SHIM_EXCLUSIVE */
+#endif /* PG_log_dirty */
 
 /*****************************************************************************
  * Entry points into the paging-assistance code */

[PATCH v2 2/4] x86/shim: adjust Kconfig defaults
Posted by Jan Beulich 3 years, 7 months ago
Just like HVM, defaulting SHADOW_PAGING and TBOOT to Yes in shim-
exclusive mode makes no sense, as the respective code is dead there.

Also adjust the shim default config file: It needs to specifiy values
only for settings where a non-default value is wanted.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
---
v2: Use simple default expression where possible.

--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -116,9 +116,9 @@ config XEN_SHSTK
 	  compatiblity can be provided via the PV Shim mechanism.
 
 config SHADOW_PAGING
-        bool "Shadow Paging"
-        default y
-        ---help---
+	bool "Shadow Paging"
+	default !PV_SHIM_EXCLUSIVE
+	---help---
 
           Shadow paging is a software alternative to hardware paging support
           (Intel EPT, AMD NPT).
@@ -165,8 +165,8 @@ config HVM_FEP
 	  If unsure, say N.
 
 config TBOOT
-	def_bool y
-	prompt "Xen tboot support" if EXPERT
+	bool "Xen tboot support" if EXPERT
+	default y if !PV_SHIM_EXCLUSIVE
 	select CRYPTO
 	---help---
 	  Allows support for Trusted Boot using the Intel(R) Trusted Execution
--- a/xen/arch/x86/configs/pvshim_defconfig
+++ b/xen/arch/x86/configs/pvshim_defconfig
@@ -8,12 +8,9 @@ CONFIG_NR_CPUS=32
 CONFIG_EXPERT=y
 CONFIG_SCHED_NULL=y
 # Disable features not used by the PV shim
-# CONFIG_HVM is not set
 # CONFIG_XEN_SHSTK is not set
 # CONFIG_HYPFS is not set
-# CONFIG_SHADOW_PAGING is not set
 # CONFIG_BIGMEM is not set
-# CONFIG_TBOOT is not set
 # CONFIG_KEXEC is not set
 # CONFIG_XENOPROF is not set
 # CONFIG_XSM is not set


[PATCH v2 3/4] x86/shim: don't permit HVM and PV_SHIM_EXCLUSIVE at the same time
Posted by Jan Beulich 3 years, 7 months ago
This combination doesn't really make sense (and there likely are more);
in particular even if the code built with both options set, HVM guests
wouldn't work (and I think one wouldn't be able to create one in the
first place). The alternative here would be some presumably intrusive
#ifdef-ary to get this combination to actually build (but still not
work) again.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Restore lost default setting.

--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -23,7 +23,7 @@ config X86
 	select HAS_PDX
 	select HAS_SCHED_GRANULARITY
 	select HAS_UBSAN
-	select HAS_VPCI if !PV_SHIM_EXCLUSIVE && HVM
+	select HAS_VPCI if HVM
 	select NEEDS_LIBELF
 	select NUMA
 
@@ -90,8 +90,9 @@ config PV_LINEAR_PT
          If unsure, say Y.
 
 config HVM
-	def_bool !PV_SHIM_EXCLUSIVE
-	prompt "HVM support"
+	bool "HVM support"
+	depends on !PV_SHIM_EXCLUSIVE
+	default y
 	---help---
 	  Interfaces to support HVM domains.  HVM domains require hardware
 	  virtualisation extensions (e.g. Intel VT-x, AMD SVM), but can boot


Re: [PATCH v2 3/4] x86/shim: don't permit HVM and PV_SHIM_EXCLUSIVE at the same time
Posted by Roger Pau Monné 3 years, 6 months ago
On Wed, Sep 16, 2020 at 03:08:00PM +0200, Jan Beulich wrote:
> This combination doesn't really make sense (and there likely are more);
> in particular even if the code built with both options set, HVM guests
> wouldn't work (and I think one wouldn't be able to create one in the
> first place). The alternative here would be some presumably intrusive
> #ifdef-ary to get this combination to actually build (but still not
> work) again.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

I can see the desire for being able to remove code, and the point
Andrew made about one option not making another disappear in a
completely different menu section.

Yet I don't see how to converge the two together, unless we completely
change our menu layouts, and even then I'm not sure I see how we could
structure this. Hence:

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.

Re: [PATCH v2 3/4] x86/shim: don't permit HVM and PV_SHIM_EXCLUSIVE at the same time
Posted by Jan Beulich 3 years, 6 months ago
On 08.10.2020 16:52, Roger Pau Monné wrote:
> On Wed, Sep 16, 2020 at 03:08:00PM +0200, Jan Beulich wrote:
>> This combination doesn't really make sense (and there likely are more);
>> in particular even if the code built with both options set, HVM guests
>> wouldn't work (and I think one wouldn't be able to create one in the
>> first place). The alternative here would be some presumably intrusive
>> #ifdef-ary to get this combination to actually build (but still not
>> work) again.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> I can see the desire for being able to remove code, and the point
> Andrew made about one option not making another disappear in a
> completely different menu section.
> 
> Yet I don't see how to converge the two together, unless we completely
> change our menu layouts, and even then I'm not sure I see how we could
> structure this. Hence:
> 
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks.

Andrew - are you okay with this going in then? Or if not, do you have
any thoughts towards an alternative approach?

Jan

[PATCH v2 4/4] x86/shadow: refactor shadow_vram_{get,put}_l1e()
Posted by Jan Beulich 3 years, 7 months ago
By passing the functions an MFN and flags, only a single instance of
each is needed; they were pretty large for being inline functions
anyway.

While moving the code, also adjust coding style and add const where
sensible / possible.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/arch/x86/mm/shadow/hvm.c
+++ b/xen/arch/x86/mm/shadow/hvm.c
@@ -903,6 +903,104 @@ int shadow_track_dirty_vram(struct domai
     return rc;
 }
 
+void shadow_vram_get_mfn(mfn_t mfn, unsigned int l1f,
+                         mfn_t sl1mfn, const void *sl1e,
+                         const struct domain *d)
+{
+    unsigned long gfn;
+    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
+
+    ASSERT(is_hvm_domain(d));
+
+    if ( !dirty_vram /* tracking disabled? */ ||
+         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
+         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
+        return;
+
+    gfn = gfn_x(mfn_to_gfn(d, mfn));
+    /* Page sharing not supported on shadow PTs */
+    BUG_ON(SHARED_M2P(gfn));
+
+    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    {
+        unsigned long i = gfn - dirty_vram->begin_pfn;
+        const struct page_info *page = mfn_to_page(mfn);
+
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
+            /* Initial guest reference, record it */
+            dirty_vram->sl1ma[i] = mfn_to_maddr(sl1mfn) |
+                                   PAGE_OFFSET(sl1e);
+    }
+}
+
+void shadow_vram_put_mfn(mfn_t mfn, unsigned int l1f,
+                         mfn_t sl1mfn, const void *sl1e,
+                         const struct domain *d)
+{
+    unsigned long gfn;
+    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
+
+    ASSERT(is_hvm_domain(d));
+
+    if ( !dirty_vram /* tracking disabled? */ ||
+         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
+         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
+        return;
+
+    gfn = gfn_x(mfn_to_gfn(d, mfn));
+    /* Page sharing not supported on shadow PTs */
+    BUG_ON(SHARED_M2P(gfn));
+
+    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
+    {
+        unsigned long i = gfn - dirty_vram->begin_pfn;
+        const struct page_info *page = mfn_to_page(mfn);
+        bool dirty = false;
+        paddr_t sl1ma = mfn_to_maddr(sl1mfn) | PAGE_OFFSET(sl1e);
+
+        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
+        {
+            /* Last reference */
+            if ( dirty_vram->sl1ma[i] == INVALID_PADDR )
+            {
+                /* We didn't know it was that one, let's say it is dirty */
+                dirty = true;
+            }
+            else
+            {
+                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
+                dirty_vram->sl1ma[i] = INVALID_PADDR;
+                if ( l1f & _PAGE_DIRTY )
+                    dirty = true;
+            }
+        }
+        else
+        {
+            /* We had more than one reference, just consider the page dirty. */
+            dirty = true;
+            /* Check that it's not the one we recorded. */
+            if ( dirty_vram->sl1ma[i] == sl1ma )
+            {
+                /* Too bad, we remembered the wrong one... */
+                dirty_vram->sl1ma[i] = INVALID_PADDR;
+            }
+            else
+            {
+                /*
+                 * Ok, our recorded sl1e is still pointing to this page, let's
+                 * just hope it will remain.
+                 */
+            }
+        }
+
+        if ( dirty )
+        {
+            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
+            dirty_vram->last_dirty = NOW();
+        }
+    }
+}
+
 /*
  * Local variables:
  * mode: C
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -1047,107 +1047,6 @@ static int shadow_set_l2e(struct domain
     return flags;
 }
 
-static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-#ifdef CONFIG_HVM
-    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
-    int flags = shadow_l1e_get_flags(new_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
-
-    if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = gfn_x(mfn_to_gfn(d, mfn));
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
-    {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-            /* Initial guest reference, record it */
-            dirty_vram->sl1ma[i] = mfn_to_maddr(sl1mfn)
-                | ((unsigned long)sl1e & ~PAGE_MASK);
-    }
-#endif
-}
-
-static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
-                                       shadow_l1e_t *sl1e,
-                                       mfn_t sl1mfn,
-                                       struct domain *d)
-{
-#ifdef CONFIG_HVM
-    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
-    int flags = shadow_l1e_get_flags(old_sl1e);
-    unsigned long gfn;
-    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
-
-    if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
-         || !(flags & _PAGE_RW) /* read-only mapping? */
-         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
-        return;
-
-    gfn = gfn_x(mfn_to_gfn(d, mfn));
-    /* Page sharing not supported on shadow PTs */
-    BUG_ON(SHARED_M2P(gfn));
-
-    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
-    {
-        unsigned long i = gfn - dirty_vram->begin_pfn;
-        struct page_info *page = mfn_to_page(mfn);
-        int dirty = 0;
-        paddr_t sl1ma = mfn_to_maddr(sl1mfn)
-            | ((unsigned long)sl1e & ~PAGE_MASK);
-
-        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
-        {
-            /* Last reference */
-            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
-                /* We didn't know it was that one, let's say it is dirty */
-                dirty = 1;
-            }
-            else
-            {
-                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-                if ( flags & _PAGE_DIRTY )
-                    dirty = 1;
-            }
-        }
-        else
-        {
-            /* We had more than one reference, just consider the page dirty. */
-            dirty = 1;
-            /* Check that it's not the one we recorded. */
-            if ( dirty_vram->sl1ma[i] == sl1ma )
-            {
-                /* Too bad, we remembered the wrong one... */
-                dirty_vram->sl1ma[i] = INVALID_PADDR;
-            }
-            else
-            {
-                /* Ok, our recorded sl1e is still pointing to this page, let's
-                 * just hope it will remain. */
-            }
-        }
-        if ( dirty )
-        {
-            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
-            dirty_vram->last_dirty = NOW();
-        }
-    }
-#endif
-}
-
 static int shadow_set_l1e(struct domain *d,
                           shadow_l1e_t *sl1e,
                           shadow_l1e_t new_sl1e,
@@ -1156,6 +1055,7 @@ static int shadow_set_l1e(struct domain
 {
     int flags = 0;
     shadow_l1e_t old_sl1e;
+    unsigned int old_sl1f;
 #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
     mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e);
 #endif
@@ -1194,7 +1094,9 @@ static int shadow_set_l1e(struct domain
                 new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc);
                 /* fall through */
             case 0:
-                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
+                shadow_vram_get_mfn(shadow_l1e_get_mfn(new_sl1e),
+                                    shadow_l1e_get_flags(new_sl1e),
+                                    sl1mfn, sl1e, d);
                 break;
             }
 #undef PAGE_FLIPPABLE
@@ -1205,20 +1107,19 @@ static int shadow_set_l1e(struct domain
     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
     flags |= SHADOW_SET_CHANGED;
 
-    if ( (shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT)
-         && !sh_l1e_is_magic(old_sl1e) )
+    old_sl1f = shadow_l1e_get_flags(old_sl1e);
+    if ( (old_sl1f & _PAGE_PRESENT) && !sh_l1e_is_magic(old_sl1e) &&
+         shadow_mode_refcounts(d) )
     {
         /* We lost a reference to an old mfn. */
         /* N.B. Unlike higher-level sets, never need an extra flush
          * when writing an l1e.  Because it points to the same guest frame
          * as the guest l1e did, it's the guest's responsibility to
          * trigger a flush later. */
-        if ( shadow_mode_refcounts(d) )
-        {
-            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
-            shadow_put_page_from_l1e(old_sl1e, d);
-            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
-        }
+        shadow_vram_put_mfn(shadow_l1e_get_mfn(old_sl1e), old_sl1f,
+                            sl1mfn, sl1e, d);
+        shadow_put_page_from_l1e(old_sl1e, d);
+        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
     }
     return flags;
 }
@@ -1944,9 +1845,12 @@ void sh_destroy_l1_shadow(struct domain
         /* Decrement refcounts of all the old entries */
         mfn_t sl1mfn = smfn;
         SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
-            if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
-                 && !sh_l1e_is_magic(*sl1e) ) {
-                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
+            unsigned int sl1f = shadow_l1e_get_flags(*sl1e);
+
+            if ( (sl1f & _PAGE_PRESENT) && !sh_l1e_is_magic(*sl1e) )
+            {
+                shadow_vram_put_mfn(shadow_l1e_get_mfn(*sl1e), sl1f,
+                                    sl1mfn, sl1e, d);
                 shadow_put_page_from_l1e(*sl1e, d);
             }
         });
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -410,6 +410,14 @@ void shadow_update_paging_modes(struct v
  * With user_only == 1, unhooks only the user-mode mappings. */
 void shadow_unhook_mappings(struct domain *d, mfn_t smfn, int user_only);
 
+/* VRAM dirty tracking helpers. */
+void shadow_vram_get_mfn(mfn_t mfn, unsigned int l1f,
+                         mfn_t sl1mfn, const void *sl1e,
+                         const struct domain *d);
+void shadow_vram_put_mfn(mfn_t mfn, unsigned int l1f,
+                         mfn_t sl1mfn, const void *sl1e,
+                         const struct domain *d);
+
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
 /* Allow a shadowed page to go out of sync */
 int sh_unsync(struct vcpu *v, mfn_t gmfn);


Re: [PATCH v2 4/4] x86/shadow: refactor shadow_vram_{get,put}_l1e()
Posted by Roger Pau Monné 3 years, 6 months ago
On Wed, Sep 16, 2020 at 03:08:40PM +0200, Jan Beulich wrote:
> By passing the functions an MFN and flags, only a single instance of
                           ^ a
> each is needed; they were pretty large for being inline functions
> anyway.
> 
> While moving the code, also adjust coding style and add const where
> sensible / possible.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: New.
> 
> --- a/xen/arch/x86/mm/shadow/hvm.c
> +++ b/xen/arch/x86/mm/shadow/hvm.c
> @@ -903,6 +903,104 @@ int shadow_track_dirty_vram(struct domai
>      return rc;
>  }
>  
> +void shadow_vram_get_mfn(mfn_t mfn, unsigned int l1f,
> +                         mfn_t sl1mfn, const void *sl1e,
> +                         const struct domain *d)
> +{
> +    unsigned long gfn;
> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
> +
> +    ASSERT(is_hvm_domain(d));
> +
> +    if ( !dirty_vram /* tracking disabled? */ ||
> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
> +        return;
> +
> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
> +    /* Page sharing not supported on shadow PTs */
> +    BUG_ON(SHARED_M2P(gfn));
> +
> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    {
> +        unsigned long i = gfn - dirty_vram->begin_pfn;
> +        const struct page_info *page = mfn_to_page(mfn);
> +
> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> +            /* Initial guest reference, record it */
> +            dirty_vram->sl1ma[i] = mfn_to_maddr(sl1mfn) |
> +                                   PAGE_OFFSET(sl1e);
> +    }
> +}
> +
> +void shadow_vram_put_mfn(mfn_t mfn, unsigned int l1f,
> +                         mfn_t sl1mfn, const void *sl1e,
> +                         const struct domain *d)
> +{
> +    unsigned long gfn;
> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
> +
> +    ASSERT(is_hvm_domain(d));
> +
> +    if ( !dirty_vram /* tracking disabled? */ ||
> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
> +        return;
> +
> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
> +    /* Page sharing not supported on shadow PTs */
> +    BUG_ON(SHARED_M2P(gfn));
> +
> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> +    {
> +        unsigned long i = gfn - dirty_vram->begin_pfn;
> +        const struct page_info *page = mfn_to_page(mfn);
> +        bool dirty = false;
> +        paddr_t sl1ma = mfn_to_maddr(sl1mfn) | PAGE_OFFSET(sl1e);
> +
> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> +        {
> +            /* Last reference */
> +            if ( dirty_vram->sl1ma[i] == INVALID_PADDR )
> +            {
> +                /* We didn't know it was that one, let's say it is dirty */
> +                dirty = true;
> +            }
> +            else
> +            {
> +                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
> +                if ( l1f & _PAGE_DIRTY )
> +                    dirty = true;
> +            }
> +        }
> +        else
> +        {
> +            /* We had more than one reference, just consider the page dirty. */
> +            dirty = true;
> +            /* Check that it's not the one we recorded. */
> +            if ( dirty_vram->sl1ma[i] == sl1ma )
> +            {
> +                /* Too bad, we remembered the wrong one... */
> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
> +            }
> +            else
> +            {
> +                /*
> +                 * Ok, our recorded sl1e is still pointing to this page, let's
> +                 * just hope it will remain.
> +                 */
> +            }
> +        }
> +
> +        if ( dirty )
> +        {
> +            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);

Could you use _set_bit here?

> +            dirty_vram->last_dirty = NOW();
> +        }
> +    }
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> --- a/xen/arch/x86/mm/shadow/multi.c
> +++ b/xen/arch/x86/mm/shadow/multi.c
> @@ -1047,107 +1047,6 @@ static int shadow_set_l2e(struct domain
>      return flags;
>  }
>  
> -static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
> -                                       shadow_l1e_t *sl1e,
> -                                       mfn_t sl1mfn,
> -                                       struct domain *d)
> -{
> -#ifdef CONFIG_HVM
> -    mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
> -    int flags = shadow_l1e_get_flags(new_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
> -
> -    if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> -        return;
> -
> -    gfn = gfn_x(mfn_to_gfn(d, mfn));
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> -
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> -    {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -            /* Initial guest reference, record it */
> -            dirty_vram->sl1ma[i] = mfn_to_maddr(sl1mfn)
> -                | ((unsigned long)sl1e & ~PAGE_MASK);
> -    }
> -#endif
> -}
> -
> -static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
> -                                       shadow_l1e_t *sl1e,
> -                                       mfn_t sl1mfn,
> -                                       struct domain *d)
> -{
> -#ifdef CONFIG_HVM
> -    mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
> -    int flags = shadow_l1e_get_flags(old_sl1e);
> -    unsigned long gfn;
> -    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
> -
> -    if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
> -         || !(flags & _PAGE_RW) /* read-only mapping? */
> -         || !mfn_valid(mfn) )   /* mfn can be invalid in mmio_direct */
> -        return;
> -
> -    gfn = gfn_x(mfn_to_gfn(d, mfn));
> -    /* Page sharing not supported on shadow PTs */
> -    BUG_ON(SHARED_M2P(gfn));
> -
> -    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> -    {
> -        unsigned long i = gfn - dirty_vram->begin_pfn;
> -        struct page_info *page = mfn_to_page(mfn);
> -        int dirty = 0;
> -        paddr_t sl1ma = mfn_to_maddr(sl1mfn)
> -            | ((unsigned long)sl1e & ~PAGE_MASK);
> -
> -        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> -        {
> -            /* Last reference */
> -            if ( dirty_vram->sl1ma[i] == INVALID_PADDR ) {
> -                /* We didn't know it was that one, let's say it is dirty */
> -                dirty = 1;
> -            }
> -            else
> -            {
> -                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -                if ( flags & _PAGE_DIRTY )
> -                    dirty = 1;
> -            }
> -        }
> -        else
> -        {
> -            /* We had more than one reference, just consider the page dirty. */
> -            dirty = 1;
> -            /* Check that it's not the one we recorded. */
> -            if ( dirty_vram->sl1ma[i] == sl1ma )
> -            {
> -                /* Too bad, we remembered the wrong one... */
> -                dirty_vram->sl1ma[i] = INVALID_PADDR;
> -            }
> -            else
> -            {
> -                /* Ok, our recorded sl1e is still pointing to this page, let's
> -                 * just hope it will remain. */
> -            }
> -        }
> -        if ( dirty )
> -        {
> -            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> -            dirty_vram->last_dirty = NOW();
> -        }
> -    }
> -#endif
> -}
> -
>  static int shadow_set_l1e(struct domain *d,
>                            shadow_l1e_t *sl1e,
>                            shadow_l1e_t new_sl1e,
> @@ -1156,6 +1055,7 @@ static int shadow_set_l1e(struct domain
>  {
>      int flags = 0;
>      shadow_l1e_t old_sl1e;
> +    unsigned int old_sl1f;
>  #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
>      mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e);
>  #endif
> @@ -1194,7 +1094,9 @@ static int shadow_set_l1e(struct domain
>                  new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc);
>                  /* fall through */
>              case 0:
> -                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
> +                shadow_vram_get_mfn(shadow_l1e_get_mfn(new_sl1e),
> +                                    shadow_l1e_get_flags(new_sl1e),
> +                                    sl1mfn, sl1e, d);

As you have moved this function into a HVM build time file, don't you
need to guard this call, or alternatively provide a dummy handler for
!CONFIG_HVM in private.h?

Same for shadow_vram_put_mfn.

Thanks, Roger.

Re: [PATCH v2 4/4] x86/shadow: refactor shadow_vram_{get,put}_l1e()
Posted by Jan Beulich 3 years, 6 months ago
On 08.10.2020 17:15, Roger Pau Monné wrote:
> On Wed, Sep 16, 2020 at 03:08:40PM +0200, Jan Beulich wrote:
>> +void shadow_vram_put_mfn(mfn_t mfn, unsigned int l1f,
>> +                         mfn_t sl1mfn, const void *sl1e,
>> +                         const struct domain *d)
>> +{
>> +    unsigned long gfn;
>> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
>> +
>> +    ASSERT(is_hvm_domain(d));
>> +
>> +    if ( !dirty_vram /* tracking disabled? */ ||
>> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
>> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
>> +        return;
>> +
>> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
>> +    /* Page sharing not supported on shadow PTs */
>> +    BUG_ON(SHARED_M2P(gfn));
>> +
>> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
>> +    {
>> +        unsigned long i = gfn - dirty_vram->begin_pfn;
>> +        const struct page_info *page = mfn_to_page(mfn);
>> +        bool dirty = false;
>> +        paddr_t sl1ma = mfn_to_maddr(sl1mfn) | PAGE_OFFSET(sl1e);
>> +
>> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
>> +        {
>> +            /* Last reference */
>> +            if ( dirty_vram->sl1ma[i] == INVALID_PADDR )
>> +            {
>> +                /* We didn't know it was that one, let's say it is dirty */
>> +                dirty = true;
>> +            }
>> +            else
>> +            {
>> +                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
>> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
>> +                if ( l1f & _PAGE_DIRTY )
>> +                    dirty = true;
>> +            }
>> +        }
>> +        else
>> +        {
>> +            /* We had more than one reference, just consider the page dirty. */
>> +            dirty = true;
>> +            /* Check that it's not the one we recorded. */
>> +            if ( dirty_vram->sl1ma[i] == sl1ma )
>> +            {
>> +                /* Too bad, we remembered the wrong one... */
>> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
>> +            }
>> +            else
>> +            {
>> +                /*
>> +                 * Ok, our recorded sl1e is still pointing to this page, let's
>> +                 * just hope it will remain.
>> +                 */
>> +            }
>> +        }
>> +
>> +        if ( dirty )
>> +        {
>> +            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> 
> Could you use _set_bit here?

In addition to what Andrew has said - this would be a non cosmetic
change, which I wouldn't want to do in a patch merely moving this
code.

>> @@ -1194,7 +1094,9 @@ static int shadow_set_l1e(struct domain
>>                  new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc);
>>                  /* fall through */
>>              case 0:
>> -                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
>> +                shadow_vram_get_mfn(shadow_l1e_get_mfn(new_sl1e),
>> +                                    shadow_l1e_get_flags(new_sl1e),
>> +                                    sl1mfn, sl1e, d);
> 
> As you have moved this function into a HVM build time file, don't you
> need to guard this call, or alternatively provide a dummy handler for
> !CONFIG_HVM in private.h?
> 
> Same for shadow_vram_put_mfn.

All uses are inside conditionals using shadow_mode_refcounts(), i.e.
the compiler's DCE pass will eliminate the calls. All we need are
declarations to be in scope.

Jan

Re: [PATCH v2 4/4] x86/shadow: refactor shadow_vram_{get,put}_l1e()
Posted by Andrew Cooper 3 years, 6 months ago
On 08/10/2020 16:15, Roger Pau Monné wrote:
> On Wed, Sep 16, 2020 at 03:08:40PM +0200, Jan Beulich wrote:
>> By passing the functions an MFN and flags, only a single instance of
>                            ^ a

'an' is correct.

an MFN
a Machine Frame Number

because the pronunciation changes.  "an" precedes anything with a vowel
sound, not just vowels themselves.  (Isn't English great...)

>> each is needed; they were pretty large for being inline functions
>> anyway.
>>
>> While moving the code, also adjust coding style and add const where
>> sensible / possible.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> v2: New.
>>
>> --- a/xen/arch/x86/mm/shadow/hvm.c
>> +++ b/xen/arch/x86/mm/shadow/hvm.c
>> @@ -903,6 +903,104 @@ int shadow_track_dirty_vram(struct domai
>>      return rc;
>>  }
>>  
>> +void shadow_vram_get_mfn(mfn_t mfn, unsigned int l1f,
>> +                         mfn_t sl1mfn, const void *sl1e,
>> +                         const struct domain *d)
>> +{
>> +    unsigned long gfn;
>> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
>> +
>> +    ASSERT(is_hvm_domain(d));
>> +
>> +    if ( !dirty_vram /* tracking disabled? */ ||
>> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
>> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
>> +        return;
>> +
>> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
>> +    /* Page sharing not supported on shadow PTs */
>> +    BUG_ON(SHARED_M2P(gfn));
>> +
>> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
>> +    {
>> +        unsigned long i = gfn - dirty_vram->begin_pfn;
>> +        const struct page_info *page = mfn_to_page(mfn);
>> +
>> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
>> +            /* Initial guest reference, record it */
>> +            dirty_vram->sl1ma[i] = mfn_to_maddr(sl1mfn) |
>> +                                   PAGE_OFFSET(sl1e);
>> +    }
>> +}
>> +
>> +void shadow_vram_put_mfn(mfn_t mfn, unsigned int l1f,
>> +                         mfn_t sl1mfn, const void *sl1e,
>> +                         const struct domain *d)
>> +{
>> +    unsigned long gfn;
>> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
>> +
>> +    ASSERT(is_hvm_domain(d));
>> +
>> +    if ( !dirty_vram /* tracking disabled? */ ||
>> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
>> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
>> +        return;
>> +
>> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
>> +    /* Page sharing not supported on shadow PTs */
>> +    BUG_ON(SHARED_M2P(gfn));
>> +
>> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
>> +    {
>> +        unsigned long i = gfn - dirty_vram->begin_pfn;
>> +        const struct page_info *page = mfn_to_page(mfn);
>> +        bool dirty = false;
>> +        paddr_t sl1ma = mfn_to_maddr(sl1mfn) | PAGE_OFFSET(sl1e);
>> +
>> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
>> +        {
>> +            /* Last reference */
>> +            if ( dirty_vram->sl1ma[i] == INVALID_PADDR )
>> +            {
>> +                /* We didn't know it was that one, let's say it is dirty */
>> +                dirty = true;
>> +            }
>> +            else
>> +            {
>> +                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
>> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
>> +                if ( l1f & _PAGE_DIRTY )
>> +                    dirty = true;
>> +            }
>> +        }
>> +        else
>> +        {
>> +            /* We had more than one reference, just consider the page dirty. */
>> +            dirty = true;
>> +            /* Check that it's not the one we recorded. */
>> +            if ( dirty_vram->sl1ma[i] == sl1ma )
>> +            {
>> +                /* Too bad, we remembered the wrong one... */
>> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
>> +            }
>> +            else
>> +            {
>> +                /*
>> +                 * Ok, our recorded sl1e is still pointing to this page, let's
>> +                 * just hope it will remain.
>> +                 */
>> +            }
>> +        }
>> +
>> +        if ( dirty )
>> +        {
>> +            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> Could you use _set_bit here?

__set_bit() uses 4-byte accesses.  This uses 1-byte accesses.

Last I checked, there is a boundary issue at the end of the dirty_bitmap.

Both Julien and I have considered changing our bit infrastructure to use
byte accesses, which would make them more generally useful.

~Andrew

Re: [PATCH v2 4/4] x86/shadow: refactor shadow_vram_{get,put}_l1e()
Posted by Roger Pau Monné 3 years, 6 months ago
On Thu, Oct 08, 2020 at 04:36:47PM +0100, Andrew Cooper wrote:
> On 08/10/2020 16:15, Roger Pau Monné wrote:
> > On Wed, Sep 16, 2020 at 03:08:40PM +0200, Jan Beulich wrote:
> >> By passing the functions an MFN and flags, only a single instance of
> >                            ^ a
> 
> 'an' is correct.
> 
> an MFN
> a Machine Frame Number
> 
> because the pronunciation changes.  "an" precedes anything with a vowel
> sound, not just vowels themselves.  (Isn't English great...)

Oh great, I think I've been misspelling this myself for a long time.

> >> each is needed; they were pretty large for being inline functions
> >> anyway.
> >>
> >> While moving the code, also adjust coding style and add const where
> >> sensible / possible.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> v2: New.
> >>
> >> --- a/xen/arch/x86/mm/shadow/hvm.c
> >> +++ b/xen/arch/x86/mm/shadow/hvm.c
> >> @@ -903,6 +903,104 @@ int shadow_track_dirty_vram(struct domai
> >>      return rc;
> >>  }
> >>  
> >> +void shadow_vram_get_mfn(mfn_t mfn, unsigned int l1f,
> >> +                         mfn_t sl1mfn, const void *sl1e,
> >> +                         const struct domain *d)
> >> +{
> >> +    unsigned long gfn;
> >> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
> >> +
> >> +    ASSERT(is_hvm_domain(d));
> >> +
> >> +    if ( !dirty_vram /* tracking disabled? */ ||
> >> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
> >> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
> >> +        return;
> >> +
> >> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
> >> +    /* Page sharing not supported on shadow PTs */
> >> +    BUG_ON(SHARED_M2P(gfn));
> >> +
> >> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> >> +    {
> >> +        unsigned long i = gfn - dirty_vram->begin_pfn;
> >> +        const struct page_info *page = mfn_to_page(mfn);
> >> +
> >> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> >> +            /* Initial guest reference, record it */
> >> +            dirty_vram->sl1ma[i] = mfn_to_maddr(sl1mfn) |
> >> +                                   PAGE_OFFSET(sl1e);
> >> +    }
> >> +}
> >> +
> >> +void shadow_vram_put_mfn(mfn_t mfn, unsigned int l1f,
> >> +                         mfn_t sl1mfn, const void *sl1e,
> >> +                         const struct domain *d)
> >> +{
> >> +    unsigned long gfn;
> >> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
> >> +
> >> +    ASSERT(is_hvm_domain(d));
> >> +
> >> +    if ( !dirty_vram /* tracking disabled? */ ||
> >> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
> >> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
> >> +        return;
> >> +
> >> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
> >> +    /* Page sharing not supported on shadow PTs */
> >> +    BUG_ON(SHARED_M2P(gfn));
> >> +
> >> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
> >> +    {
> >> +        unsigned long i = gfn - dirty_vram->begin_pfn;
> >> +        const struct page_info *page = mfn_to_page(mfn);
> >> +        bool dirty = false;
> >> +        paddr_t sl1ma = mfn_to_maddr(sl1mfn) | PAGE_OFFSET(sl1e);
> >> +
> >> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
> >> +        {
> >> +            /* Last reference */
> >> +            if ( dirty_vram->sl1ma[i] == INVALID_PADDR )
> >> +            {
> >> +                /* We didn't know it was that one, let's say it is dirty */
> >> +                dirty = true;
> >> +            }
> >> +            else
> >> +            {
> >> +                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
> >> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
> >> +                if ( l1f & _PAGE_DIRTY )
> >> +                    dirty = true;
> >> +            }
> >> +        }
> >> +        else
> >> +        {
> >> +            /* We had more than one reference, just consider the page dirty. */
> >> +            dirty = true;
> >> +            /* Check that it's not the one we recorded. */
> >> +            if ( dirty_vram->sl1ma[i] == sl1ma )
> >> +            {
> >> +                /* Too bad, we remembered the wrong one... */
> >> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
> >> +            }
> >> +            else
> >> +            {
> >> +                /*
> >> +                 * Ok, our recorded sl1e is still pointing to this page, let's
> >> +                 * just hope it will remain.
> >> +                 */
> >> +            }
> >> +        }
> >> +
> >> +        if ( dirty )
> >> +        {
> >> +            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
> > Could you use _set_bit here?
> 
> __set_bit() uses 4-byte accesses.  This uses 1-byte accesses.

Right, this is allocated using alloc directly, not the bitmap helper,
and the size if rounded to byte level, not unsigned int.

> Last I checked, there is a boundary issue at the end of the dirty_bitmap.
> 
> Both Julien and I have considered changing our bit infrastructure to use
> byte accesses, which would make them more generally useful.

Does indeed seem useful to me, as we could safely expand the usage of
the bitmap ops without risking introducing bugs.

Thanks, Roger.

Re: [PATCH v2 4/4] x86/shadow: refactor shadow_vram_{get,put}_l1e()
Posted by Jan Beulich 3 years, 6 months ago
On 10.10.2020 09:45, Roger Pau Monné wrote:
> On Thu, Oct 08, 2020 at 04:36:47PM +0100, Andrew Cooper wrote:
>> On 08/10/2020 16:15, Roger Pau Monné wrote:
>>> On Wed, Sep 16, 2020 at 03:08:40PM +0200, Jan Beulich wrote:
>>>> +void shadow_vram_put_mfn(mfn_t mfn, unsigned int l1f,
>>>> +                         mfn_t sl1mfn, const void *sl1e,
>>>> +                         const struct domain *d)
>>>> +{
>>>> +    unsigned long gfn;
>>>> +    struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
>>>> +
>>>> +    ASSERT(is_hvm_domain(d));
>>>> +
>>>> +    if ( !dirty_vram /* tracking disabled? */ ||
>>>> +         !(l1f & _PAGE_RW) /* read-only mapping? */ ||
>>>> +         !mfn_valid(mfn) /* mfn can be invalid in mmio_direct */)
>>>> +        return;
>>>> +
>>>> +    gfn = gfn_x(mfn_to_gfn(d, mfn));
>>>> +    /* Page sharing not supported on shadow PTs */
>>>> +    BUG_ON(SHARED_M2P(gfn));
>>>> +
>>>> +    if ( (gfn >= dirty_vram->begin_pfn) && (gfn < dirty_vram->end_pfn) )
>>>> +    {
>>>> +        unsigned long i = gfn - dirty_vram->begin_pfn;
>>>> +        const struct page_info *page = mfn_to_page(mfn);
>>>> +        bool dirty = false;
>>>> +        paddr_t sl1ma = mfn_to_maddr(sl1mfn) | PAGE_OFFSET(sl1e);
>>>> +
>>>> +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
>>>> +        {
>>>> +            /* Last reference */
>>>> +            if ( dirty_vram->sl1ma[i] == INVALID_PADDR )
>>>> +            {
>>>> +                /* We didn't know it was that one, let's say it is dirty */
>>>> +                dirty = true;
>>>> +            }
>>>> +            else
>>>> +            {
>>>> +                ASSERT(dirty_vram->sl1ma[i] == sl1ma);
>>>> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
>>>> +                if ( l1f & _PAGE_DIRTY )
>>>> +                    dirty = true;
>>>> +            }
>>>> +        }
>>>> +        else
>>>> +        {
>>>> +            /* We had more than one reference, just consider the page dirty. */
>>>> +            dirty = true;
>>>> +            /* Check that it's not the one we recorded. */
>>>> +            if ( dirty_vram->sl1ma[i] == sl1ma )
>>>> +            {
>>>> +                /* Too bad, we remembered the wrong one... */
>>>> +                dirty_vram->sl1ma[i] = INVALID_PADDR;
>>>> +            }
>>>> +            else
>>>> +            {
>>>> +                /*
>>>> +                 * Ok, our recorded sl1e is still pointing to this page, let's
>>>> +                 * just hope it will remain.
>>>> +                 */
>>>> +            }
>>>> +        }
>>>> +
>>>> +        if ( dirty )
>>>> +        {
>>>> +            dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
>>> Could you use _set_bit here?
>>
>> __set_bit() uses 4-byte accesses.  This uses 1-byte accesses.
> 
> Right, this is allocated using alloc directly, not the bitmap helper,
> and the size if rounded to byte level, not unsigned int.
> 
>> Last I checked, there is a boundary issue at the end of the dirty_bitmap.
>>
>> Both Julien and I have considered changing our bit infrastructure to use
>> byte accesses, which would make them more generally useful.
> 
> Does indeed seem useful to me, as we could safely expand the usage of
> the bitmap ops without risking introducing bugs.

Aren't there architectures being handicapped when it comes to sub-word
accesses? At least common code may better not make assumptions towards
more fine grained accesses ...

As to x86, couldn't we make the macros evaluate alignof(*(addr)) and
choose byte-based accesses when it's less than 4?

Jan