[PATCH v2 0/9] x86/p2m: hook adjustments

Jan Beulich posted 9 patches 3 weeks, 1 day ago
Only 0 patches received!

[PATCH v2 0/9] x86/p2m: hook adjustments

Posted by Jan Beulich 3 weeks, 1 day ago
This started out with me getting confused by the two write_p2m_entry()
hooks we have - there really ought to be no more than one, or if two
were absolutely needed they imo would better have distinct names. Other
adjustment opportunities (and I hope they're improvements) were found
while getting rid of that one unnecessary layer of indirect calls.

v2 has a build fix for clang in patch 3 and a few new patches.

1: p2m: paging_write_p2m_entry() is a private function
2: p2m: collapse the two ->write_p2m_entry() hooks
3: p2m: suppress audit_p2m hook when possible
4: HAP: move nested-P2M flush calculations out of locked region
5: p2m: split write_p2m_entry() hook
6: p2m: avoid unnecessary calls of write_p2m_entry_pre() hook
7: p2m: pass old PTE directly to write_p2m_entry_pre() hook
8: shadow: cosmetics to sh_unshadow_for_p2m_change()
9: shadow: adjust TLB flushing in sh_unshadow_for_p2m_change()

Jan

[PATCH v2 1/9] x86/p2m: paging_write_p2m_entry() is a private function

Posted by Jan Beulich 3 weeks, 1 day ago
As it gets installed by p2m_pt_init(), it doesn't need to live in
paging.c. The function working in terms of l1_pgentry_t even further
indicates its non-paging-generic nature. Move it and drop its
paging_ prefix, not adding any new one now that it's static.

This then also makes more obvious that in the EPT case we wouldn't
risk mistakenly calling through the NULL hook pointer.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -108,6 +108,31 @@ static unsigned long p2m_type_to_flags(c
     }
 }
 
+/*
+ * Atomically write a P2M entry and update the paging-assistance state
+ * appropriately.
+ * Arguments: the domain in question, the GFN whose mapping is being updated,
+ * a pointer to the entry to be written, the MFN in which the entry resides,
+ * the new contents of the entry, and the level in the p2m tree at which
+ * we are writing.
+ */
+static int write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
+                           l1_pgentry_t *p, l1_pgentry_t new,
+                           unsigned int level)
+{
+    struct domain *d = p2m->domain;
+    struct vcpu *v = current;
+    int rc = 0;
+
+    if ( v->domain != d )
+        v = d->vcpu ? d->vcpu[0] : NULL;
+    if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v)) )
+        rc = paging_get_hostmode(v)->write_p2m_entry(p2m, gfn, p, new, level);
+    else
+        safe_write_pte(p, new);
+
+    return rc;
+}
 
 // Find the next level's P2M entry, checking for out-of-range gfn's...
 // Returns NULL on error.
@@ -594,7 +619,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
         entry_content.l1 = l3e_content.l3;
 
         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3);
-        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+        /* NB: write_p2m_entry() handles tlb flushes properly */
         if ( rc )
             goto out;
     }
@@ -631,7 +656,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
 
         /* level 1 entry */
         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1);
-        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+        /* NB: write_p2m_entry() handles tlb flushes properly */
         if ( rc )
             goto out;
     }
@@ -666,7 +691,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
         entry_content.l1 = l2e_content.l2;
 
         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2);
-        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+        /* NB: write_p2m_entry() handles tlb flushes properly */
         if ( rc )
             goto out;
     }
@@ -1107,7 +1132,7 @@ void p2m_pt_init(struct p2m_domain *p2m)
     p2m->recalc = do_recalc;
     p2m->change_entry_type_global = p2m_pt_change_entry_type_global;
     p2m->change_entry_type_range = p2m_pt_change_entry_type_range;
-    p2m->write_p2m_entry = paging_write_p2m_entry;
+    p2m->write_p2m_entry = write_p2m_entry;
 #if P2M_AUDIT
     p2m->audit_p2m = p2m_pt_audit_p2m;
 #else
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -941,27 +941,7 @@ void paging_update_nestedmode(struct vcp
         v->arch.paging.nestedmode = NULL;
     hvm_asid_flush_vcpu(v);
 }
-#endif
 
-int paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-                           l1_pgentry_t *p, l1_pgentry_t new,
-                           unsigned int level)
-{
-    struct domain *d = p2m->domain;
-    struct vcpu *v = current;
-    int rc = 0;
-
-    if ( v->domain != d )
-        v = d->vcpu ? d->vcpu[0] : NULL;
-    if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) )
-        rc = paging_get_hostmode(v)->write_p2m_entry(p2m, gfn, p, new, level);
-    else
-        safe_write_pte(p, new);
-
-    return rc;
-}
-
-#ifdef CONFIG_HVM
 int __init paging_set_allocation(struct domain *d, unsigned int pages,
                                  bool *preempted)
 {
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -369,18 +369,6 @@ static inline void safe_write_pte(l1_pge
     *p = new;
 }
 
-/* Atomically write a P2M entry and update the paging-assistance state 
- * appropriately. 
- * Arguments: the domain in question, the GFN whose mapping is being updated, 
- * a pointer to the entry to be written, the MFN in which the entry resides, 
- * the new contents of the entry, and the level in the p2m tree at which 
- * we are writing. */
-struct p2m_domain;
-
-int paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-                           l1_pgentry_t *p, l1_pgentry_t new,
-                           unsigned int level);
-
 /*
  * Called from the guest to indicate that the a process is being
  * torn down and its pagetables will soon be discarded.


[PATCH v2 2/9] x86/p2m: collapse the two ->write_p2m_entry() hooks

Posted by Jan Beulich 3 weeks, 1 day ago
The struct paging_mode instances get set to the same functions
regardless of mode by both HAP and shadow code, hence there's no point
having this hook there. The hook also doesn't need moving elsewhere - we
can directly use struct p2m_domain's. This merely requires (from a
strictly formal pov; in practice this may not even be needed) making
sure we don't end up using safe_write_pte() for nested P2Ms.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
---
Like for the possibly unnecessary p2m_is_nestedp2m() I'm not really sure
the paging_get_hostmode() check there is still needed either. But I
didn't want to alter more aspects than necessary here.

Of course with the p2m_is_nestedp2m() check there and with all three of
{hap,nestedp2m,shadow}_write_p2m_entry() now globally accessible, it's
certainly an option to do away with the indirect call there altogether.
In fact we may even be able to go further and fold the three functions:
They're relatively similar, and this would "seamlessly" address the
apparent bug of nestedp2m_write_p2m_entry() not making use of
p2m_entry_modify().

--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -823,6 +823,11 @@ hap_write_p2m_entry(struct p2m_domain *p
     return 0;
 }
 
+void hap_p2m_init(struct p2m_domain *p2m)
+{
+    p2m->write_p2m_entry = hap_write_p2m_entry;
+}
+
 static unsigned long hap_gva_to_gfn_real_mode(
     struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec)
 {
@@ -846,7 +851,6 @@ static const struct paging_mode hap_pagi
     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_real_mode,
     .update_cr3             = hap_update_cr3,
     .update_paging_modes    = hap_update_paging_modes,
-    .write_p2m_entry        = hap_write_p2m_entry,
     .flush_tlb              = flush_tlb,
     .guest_levels           = 1
 };
@@ -858,7 +862,6 @@ static const struct paging_mode hap_pagi
     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_2_levels,
     .update_cr3             = hap_update_cr3,
     .update_paging_modes    = hap_update_paging_modes,
-    .write_p2m_entry        = hap_write_p2m_entry,
     .flush_tlb              = flush_tlb,
     .guest_levels           = 2
 };
@@ -870,7 +873,6 @@ static const struct paging_mode hap_pagi
     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_3_levels,
     .update_cr3             = hap_update_cr3,
     .update_paging_modes    = hap_update_paging_modes,
-    .write_p2m_entry        = hap_write_p2m_entry,
     .flush_tlb              = flush_tlb,
     .guest_levels           = 3
 };
@@ -882,7 +884,6 @@ static const struct paging_mode hap_pagi
     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_4_levels,
     .update_cr3             = hap_update_cr3,
     .update_paging_modes    = hap_update_paging_modes,
-    .write_p2m_entry        = hap_write_p2m_entry,
     .flush_tlb              = flush_tlb,
     .guest_levels           = 4
 };
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -126,8 +126,9 @@ static int write_p2m_entry(struct p2m_do
 
     if ( v->domain != d )
         v = d->vcpu ? d->vcpu[0] : NULL;
-    if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v)) )
-        rc = paging_get_hostmode(v)->write_p2m_entry(p2m, gfn, p, new, level);
+    if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v)) ||
+         p2m_is_nestedp2m(p2m) )
+        rc = p2m->write_p2m_entry(p2m, gfn, p, new, level);
     else
         safe_write_pte(p, new);
 
@@ -209,7 +210,7 @@ p2m_next_level(struct p2m_domain *p2m, v
 
         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
 
-        rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
+        rc = write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
         if ( rc )
             goto error;
     }
@@ -251,7 +252,7 @@ p2m_next_level(struct p2m_domain *p2m, v
         {
             new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)),
                                      flags);
-            rc = p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
+            rc = write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
             if ( rc )
             {
                 unmap_domain_page(l1_entry);
@@ -262,8 +263,7 @@ p2m_next_level(struct p2m_domain *p2m, v
         unmap_domain_page(l1_entry);
 
         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
-        rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry,
-                                  level + 1);
+        rc = write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
         if ( rc )
             goto error;
     }
@@ -335,7 +335,7 @@ static int p2m_pt_set_recalc_range(struc
             if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) )
             {
                 set_recalc(l1, e);
-                err = p2m->write_p2m_entry(p2m, first_gfn, pent, e, level);
+                err = write_p2m_entry(p2m, first_gfn, pent, e, level);
                 if ( err )
                 {
                     ASSERT_UNREACHABLE();
@@ -412,8 +412,8 @@ static int do_recalc(struct p2m_domain *
                      !needs_recalc(l1, ent) )
                 {
                     set_recalc(l1, ent);
-                    err = p2m->write_p2m_entry(p2m, gfn - remainder, &ptab[i],
-                                               ent, level);
+                    err = write_p2m_entry(p2m, gfn - remainder, &ptab[i], ent,
+                                          level);
                     if ( err )
                     {
                         ASSERT_UNREACHABLE();
@@ -426,7 +426,7 @@ static int do_recalc(struct p2m_domain *
             if ( !err )
             {
                 clear_recalc(l1, e);
-                err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
+                err = write_p2m_entry(p2m, gfn, pent, e, level + 1);
                 ASSERT(!err);
 
                 recalc_done = true;
@@ -474,7 +474,7 @@ static int do_recalc(struct p2m_domain *
         }
         else
             clear_recalc(l1, e);
-        err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
+        err = write_p2m_entry(p2m, gfn, pent, e, level + 1);
         ASSERT(!err);
 
         recalc_done = true;
@@ -618,7 +618,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
             : l3e_empty();
         entry_content.l1 = l3e_content.l3;
 
-        rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3);
+        rc = write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3);
         /* NB: write_p2m_entry() handles tlb flushes properly */
         if ( rc )
             goto out;
@@ -655,7 +655,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
             entry_content = l1e_empty();
 
         /* level 1 entry */
-        rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1);
+        rc = write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1);
         /* NB: write_p2m_entry() handles tlb flushes properly */
         if ( rc )
             goto out;
@@ -690,7 +690,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
             : l2e_empty();
         entry_content.l1 = l2e_content.l2;
 
-        rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2);
+        rc = write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2);
         /* NB: write_p2m_entry() handles tlb flushes properly */
         if ( rc )
             goto out;
@@ -914,7 +914,7 @@ static void p2m_pt_change_entry_type_glo
             int rc;
 
             set_recalc(l1, e);
-            rc = p2m->write_p2m_entry(p2m, gfn, &tab[i], e, 4);
+            rc = write_p2m_entry(p2m, gfn, &tab[i], e, 4);
             if ( rc )
             {
                 ASSERT_UNREACHABLE();
@@ -1132,7 +1132,13 @@ void p2m_pt_init(struct p2m_domain *p2m)
     p2m->recalc = do_recalc;
     p2m->change_entry_type_global = p2m_pt_change_entry_type_global;
     p2m->change_entry_type_range = p2m_pt_change_entry_type_range;
-    p2m->write_p2m_entry = write_p2m_entry;
+
+    /* Still too early to use paging_mode_hap(). */
+    if ( hap_enabled(p2m->domain) )
+        hap_p2m_init(p2m);
+    else if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
+        shadow_p2m_init(p2m);
+
 #if P2M_AUDIT
     p2m->audit_p2m = p2m_pt_audit_p2m;
 #else
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3144,7 +3144,7 @@ static void sh_unshadow_for_p2m_change(s
     }
 }
 
-int
+static int
 shadow_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
                        l1_pgentry_t *p, l1_pgentry_t new,
                        unsigned int level)
@@ -3190,6 +3190,11 @@ shadow_write_p2m_entry(struct p2m_domain
     return 0;
 }
 
+void shadow_p2m_init(struct p2m_domain *p2m)
+{
+    p2m->write_p2m_entry = shadow_write_p2m_entry;
+}
+
 /**************************************************************************/
 /* Log-dirty mode support */
 
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -4574,7 +4574,6 @@ const struct paging_mode sh_paging_mode
     .gva_to_gfn                    = sh_gva_to_gfn,
     .update_cr3                    = sh_update_cr3,
     .update_paging_modes           = shadow_update_paging_modes,
-    .write_p2m_entry               = shadow_write_p2m_entry,
     .flush_tlb                     = shadow_flush_tlb,
     .guest_levels                  = GUEST_PAGING_LEVELS,
     .shadow.detach_old_tables      = sh_detach_old_tables,
--- a/xen/arch/x86/mm/shadow/none.c
+++ b/xen/arch/x86/mm/shadow/none.c
@@ -60,21 +60,12 @@ static void _update_paging_modes(struct
     ASSERT_UNREACHABLE();
 }
 
-static int _write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-                            l1_pgentry_t *p, l1_pgentry_t new,
-                            unsigned int level)
-{
-    ASSERT_UNREACHABLE();
-    return -EOPNOTSUPP;
-}
-
 static const struct paging_mode sh_paging_none = {
     .page_fault                    = _page_fault,
     .invlpg                        = _invlpg,
     .gva_to_gfn                    = _gva_to_gfn,
     .update_cr3                    = _update_cr3,
     .update_paging_modes           = _update_paging_modes,
-    .write_p2m_entry               = _write_p2m_entry,
 };
 
 void shadow_vcpu_init(struct vcpu *v)
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -387,11 +387,6 @@ static inline int sh_remove_write_access
 }
 #endif
 
-/* Functions that atomically write PT/P2M entries and update state */
-int shadow_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-                           l1_pgentry_t *p, l1_pgentry_t new,
-                           unsigned int level);
-
 /* Functions that atomically write PV guest PT entries */
 void sh_write_guest_entry(struct vcpu *v, intpte_t *p, intpte_t new,
                           mfn_t gmfn);
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -836,6 +836,9 @@ void p2m_flush_nestedp2m(struct domain *
 /* Flushes the np2m specified by np2m_base (if it exists) */
 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base);
 
+void hap_p2m_init(struct p2m_domain *p2m);
+void shadow_p2m_init(struct p2m_domain *p2m);
+
 int nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
     l1_pgentry_t *p, l1_pgentry_t new, unsigned int level);
 
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -139,10 +139,6 @@ struct paging_mode {
     void          (*update_cr3            )(struct vcpu *v, int do_locking,
                                             bool noflush);
     void          (*update_paging_modes   )(struct vcpu *v);
-    int           (*write_p2m_entry       )(struct p2m_domain *p2m,
-                                            unsigned long gfn,
-                                            l1_pgentry_t *p, l1_pgentry_t new,
-                                            unsigned int level);
     bool          (*flush_tlb             )(bool (*flush_vcpu)(void *ctxt,
                                                                struct vcpu *v),
                                             void *ctxt);


[PATCH v2 3/9] x86/p2m: suppress audit_p2m hook when possible

Posted by Jan Beulich 3 weeks, 1 day ago
When P2M_AUDIT is false, it's unused, so instead of having a dangling
NULL pointer sit there, omit the field altogether.

Instead of adding "#if P2M_AUDIT && defined(CONFIG_HVM)" in even more
places, fold the latter part right into the definition of P2M_AUDIT.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Fix build with newer clang ("defined" may not be the result of a
    macro expansion).
---
I wonder if !NDEBUG wouldn't better be replaced by CONFIG_DEBUG.

--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1012,7 +1012,7 @@ long arch_do_domctl(
         break;
 #endif
 
-#if P2M_AUDIT && defined(CONFIG_HVM)
+#if P2M_AUDIT
     case XEN_DOMCTL_audit_p2m:
         if ( d == currd )
             ret = -EPERM;
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -1260,7 +1260,9 @@ int ept_p2m_init(struct p2m_domain *p2m)
     p2m->change_entry_type_global = ept_change_entry_type_global;
     p2m->change_entry_type_range = ept_change_entry_type_range;
     p2m->memory_type_changed = ept_memory_type_changed;
+#if P2M_AUDIT
     p2m->audit_p2m = NULL;
+#endif
     p2m->tlb_flush = ept_tlb_flush;
 
     /* Set the memory type used when accessing EPT paging structures. */
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -971,8 +971,8 @@ static int p2m_pt_change_entry_type_rang
     return err;
 }
 
-#if P2M_AUDIT && defined(CONFIG_HVM)
-long p2m_pt_audit_p2m(struct p2m_domain *p2m)
+#if P2M_AUDIT
+static long p2m_pt_audit_p2m(struct p2m_domain *p2m)
 {
     unsigned long entry_count = 0, pmbad = 0;
     unsigned long mfn, gfn, m2pfn;
@@ -1120,8 +1120,6 @@ long p2m_pt_audit_p2m(struct p2m_domain
 
     return pmbad;
 }
-#else
-# define p2m_pt_audit_p2m NULL
 #endif /* P2M_AUDIT */
 
 /* Set up the p2m function pointers for pagetable format */
@@ -1141,8 +1139,6 @@ void p2m_pt_init(struct p2m_domain *p2m)
 
 #if P2M_AUDIT
     p2m->audit_p2m = p2m_pt_audit_p2m;
-#else
-    p2m->audit_p2m = NULL;
 #endif
 }
 
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2435,7 +2435,7 @@ int p2m_altp2m_propagate_change(struct d
 
 /*** Audit ***/
 
-#if P2M_AUDIT && defined(CONFIG_HVM)
+#if P2M_AUDIT
 void audit_p2m(struct domain *d,
                uint64_t *orphans,
                 uint64_t *m2p_bad,
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -31,6 +31,14 @@
 #include <asm/mem_sharing.h>
 #include <asm/page.h>    /* for pagetable_t */
 
+/* Debugging and auditing of the P2M code? */
+#if !defined(NDEBUG) && defined(CONFIG_HVM)
+#define P2M_AUDIT     1
+#else
+#define P2M_AUDIT     0
+#endif
+#define P2M_DEBUGGING 0
+
 extern bool_t opt_hap_1gb, opt_hap_2mb;
 
 /*
@@ -268,7 +276,9 @@ struct p2m_domain {
     int                (*write_p2m_entry)(struct p2m_domain *p2m,
                                           unsigned long gfn, l1_pgentry_t *p,
                                           l1_pgentry_t new, unsigned int level);
+#if P2M_AUDIT
     long               (*audit_p2m)(struct p2m_domain *p2m);
+#endif
 
     /*
      * P2M updates may require TLBs to be flushed (invalidated).
@@ -758,14 +768,6 @@ extern void p2m_pt_init(struct p2m_domai
 void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
                      p2m_query_t q, uint32_t *pfec);
 
-/* Debugging and auditing of the P2M code? */
-#ifndef NDEBUG
-#define P2M_AUDIT     1
-#else
-#define P2M_AUDIT     0
-#endif
-#define P2M_DEBUGGING 0
-
 #if P2M_AUDIT
 extern void audit_p2m(struct domain *d,
                       uint64_t *orphans,


[PATCH v2 4/9] x86/HAP: move nested-P2M flush calculations out of locked region

Posted by Jan Beulich 3 weeks, 1 day ago
By latching the old MFN into a local variable, these calculations don't
depend on anything but local variables anymore. Hence the point in time
when they get performed doesn't matter anymore, so they can be moved
past the locked region.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -780,7 +780,7 @@ hap_write_p2m_entry(struct p2m_domain *p
 {
     struct domain *d = p2m->domain;
     uint32_t old_flags;
-    bool_t flush_nestedp2m = 0;
+    mfn_t omfn;
     int rc;
 
     /* We know always use the host p2m here, regardless if the vcpu
@@ -790,21 +790,11 @@ hap_write_p2m_entry(struct p2m_domain *p
 
     paging_lock(d);
     old_flags = l1e_get_flags(*p);
-
-    if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) 
-         && !p2m_get_hostp2m(d)->defer_nested_flush ) {
-        /* We are replacing a valid entry so we need to flush nested p2ms,
-         * unless the only change is an increase in access rights. */
-        mfn_t omfn = l1e_get_mfn(*p);
-        mfn_t nmfn = l1e_get_mfn(new);
-
-        flush_nestedp2m = !(mfn_eq(omfn, nmfn)
-            && perms_strictly_increased(old_flags, l1e_get_flags(new)) );
-    }
+    omfn = l1e_get_mfn(*p);
 
     rc = p2m_entry_modify(p2m, p2m_flags_to_type(l1e_get_flags(new)),
                           p2m_flags_to_type(old_flags), l1e_get_mfn(new),
-                          l1e_get_mfn(*p), level);
+                          omfn, level);
     if ( rc )
     {
         paging_unlock(d);
@@ -817,7 +807,14 @@ hap_write_p2m_entry(struct p2m_domain *p
 
     paging_unlock(d);
 
-    if ( flush_nestedp2m )
+    if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) &&
+         !p2m_get_hostp2m(d)->defer_nested_flush &&
+         /*
+          * We are replacing a valid entry so we need to flush nested p2ms,
+          * unless the only change is an increase in access rights.
+          */
+         (!mfn_eq(omfn, l1e_get_mfn(new)) ||
+          !perms_strictly_increased(old_flags, l1e_get_flags(new))) )
         p2m_flush_nestedp2m(d);
 
     return 0;


[PATCH v2 5/9] x86/p2m: split write_p2m_entry() hook

Posted by Jan Beulich 3 weeks, 1 day ago
Fair parts of the present handlers are identical; in fact
nestedp2m_write_p2m_entry() lacks a call to p2m_entry_modify(). Move
common parts right into write_p2m_entry(), splitting the hooks into a
"pre" one (needed just by shadow code) and a "post" one.

For the common parts moved I think that the p2m_flush_nestedp2m() is,
at least from an abstract perspective, also applicable in the shadow
case. Hence it doesn't get a 3rd hook put in place.

The initial comment that was in hap_write_p2m_entry() gets dropped: Its
placement was bogus, and looking back the the commit introducing it
(dd6de3ab9985 "Implement Nested-on-Nested") I can't see either what use
of a p2m it was meant to be associated with.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
---
RFC: This is effectively the alternative to the suggestion in an earlier
     patch that we might do away with the hook altogether. Of course a
     hybrid approach would also be possible, by using direct calls here
     instead of splitting the hook into two.
---
I'm unsure whether p2m_init_nestedp2m() zapping the "pre" hook is
actually correct, or whether previously the sh_unshadow_for_p2m_change()
invocation was wrongly skipped.

--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -774,55 +774,18 @@ static void hap_update_paging_modes(stru
     put_gfn(d, cr3_gfn);
 }
 
-static int
-hap_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, l1_pgentry_t *p,
-                    l1_pgentry_t new, unsigned int level)
+static void
+hap_write_p2m_entry_post(struct p2m_domain *p2m, unsigned int oflags)
 {
     struct domain *d = p2m->domain;
-    uint32_t old_flags;
-    mfn_t omfn;
-    int rc;
 
-    /* We know always use the host p2m here, regardless if the vcpu
-     * is in host or guest mode. The vcpu can be in guest mode by
-     * a hypercall which passes a domain and chooses mostly the first
-     * vcpu. */
-
-    paging_lock(d);
-    old_flags = l1e_get_flags(*p);
-    omfn = l1e_get_mfn(*p);
-
-    rc = p2m_entry_modify(p2m, p2m_flags_to_type(l1e_get_flags(new)),
-                          p2m_flags_to_type(old_flags), l1e_get_mfn(new),
-                          omfn, level);
-    if ( rc )
-    {
-        paging_unlock(d);
-        return rc;
-    }
-
-    safe_write_pte(p, new);
-    if ( old_flags & _PAGE_PRESENT )
+    if ( oflags & _PAGE_PRESENT )
         guest_flush_tlb_mask(d, d->dirty_cpumask);
-
-    paging_unlock(d);
-
-    if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) &&
-         !p2m_get_hostp2m(d)->defer_nested_flush &&
-         /*
-          * We are replacing a valid entry so we need to flush nested p2ms,
-          * unless the only change is an increase in access rights.
-          */
-         (!mfn_eq(omfn, l1e_get_mfn(new)) ||
-          !perms_strictly_increased(old_flags, l1e_get_flags(new))) )
-        p2m_flush_nestedp2m(d);
-
-    return 0;
 }
 
 void hap_p2m_init(struct p2m_domain *p2m)
 {
-    p2m->write_p2m_entry = hap_write_p2m_entry;
+    p2m->write_p2m_entry_post = hap_write_p2m_entry_post;
 }
 
 static unsigned long hap_gva_to_gfn_real_mode(
--- a/xen/arch/x86/mm/hap/nested_hap.c
+++ b/xen/arch/x86/mm/hap/nested_hap.c
@@ -71,24 +71,11 @@
 /*        NESTED VIRT P2M FUNCTIONS         */
 /********************************************/
 
-int
-nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-    l1_pgentry_t *p, l1_pgentry_t new, unsigned int level)
+void
+nestedp2m_write_p2m_entry_post(struct p2m_domain *p2m, unsigned int oflags)
 {
-    struct domain *d = p2m->domain;
-    uint32_t old_flags;
-
-    paging_lock(d);
-
-    old_flags = l1e_get_flags(*p);
-    safe_write_pte(p, new);
-
-    if (old_flags & _PAGE_PRESENT)
-        guest_flush_tlb_mask(d, p2m->dirty_cpumask);
-
-    paging_unlock(d);
-
-    return 0;
+    if ( oflags & _PAGE_PRESENT )
+        guest_flush_tlb_mask(p2m->domain, p2m->dirty_cpumask);
 }
 
 /********************************************/
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -122,17 +122,55 @@ static int write_p2m_entry(struct p2m_do
 {
     struct domain *d = p2m->domain;
     struct vcpu *v = current;
-    int rc = 0;
 
     if ( v->domain != d )
         v = d->vcpu ? d->vcpu[0] : NULL;
     if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v)) ||
          p2m_is_nestedp2m(p2m) )
-        rc = p2m->write_p2m_entry(p2m, gfn, p, new, level);
+    {
+        unsigned int oflags;
+        mfn_t omfn;
+        int rc;
+
+        paging_lock(d);
+
+        if ( p2m->write_p2m_entry_pre )
+            p2m->write_p2m_entry_pre(d, gfn, p, new, level);
+
+        oflags = l1e_get_flags(*p);
+        omfn = l1e_get_mfn(*p);
+
+        rc = p2m_entry_modify(p2m, p2m_flags_to_type(l1e_get_flags(new)),
+                              p2m_flags_to_type(oflags), l1e_get_mfn(new),
+                              omfn, level);
+        if ( rc )
+        {
+            paging_unlock(d);
+            return rc;
+        }
+
+        safe_write_pte(p, new);
+
+        if ( p2m->write_p2m_entry_post )
+            p2m->write_p2m_entry_post(p2m, oflags);
+
+        paging_unlock(d);
+
+        if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) &&
+             (oflags & _PAGE_PRESENT) &&
+             !p2m_get_hostp2m(d)->defer_nested_flush &&
+             /*
+              * We are replacing a valid entry so we need to flush nested p2ms,
+              * unless the only change is an increase in access rights.
+              */
+             (!mfn_eq(omfn, l1e_get_mfn(new)) ||
+              !perms_strictly_increased(oflags, l1e_get_flags(new))) )
+            p2m_flush_nestedp2m(d);
+    }
     else
         safe_write_pte(p, new);
 
-    return rc;
+    return 0;
 }
 
 // Find the next level's P2M entry, checking for out-of-range gfn's...
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -198,7 +198,8 @@ static int p2m_init_nestedp2m(struct dom
             return -ENOMEM;
         }
         p2m->p2m_class = p2m_nested;
-        p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
+        p2m->write_p2m_entry_pre = NULL;
+        p2m->write_p2m_entry_post = nestedp2m_write_p2m_entry_post;
         list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
     }
 
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3144,34 +3144,22 @@ static void sh_unshadow_for_p2m_change(s
     }
 }
 
-static int
-shadow_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-                       l1_pgentry_t *p, l1_pgentry_t new,
-                       unsigned int level)
+static void
+sh_write_p2m_entry_pre(struct domain *d, unsigned long gfn, l1_pgentry_t *p,
+                       l1_pgentry_t new, unsigned int level)
 {
-    struct domain *d = p2m->domain;
-    int rc;
-
-    paging_lock(d);
-
     /* If there are any shadows, update them.  But if shadow_teardown()
      * has already been called then it's not safe to try. */
     if ( likely(d->arch.paging.shadow.total_pages != 0) )
          sh_unshadow_for_p2m_change(d, gfn, p, new, level);
-
-    rc = p2m_entry_modify(p2m, p2m_flags_to_type(l1e_get_flags(new)),
-                          p2m_flags_to_type(l1e_get_flags(*p)),
-                          l1e_get_mfn(new), l1e_get_mfn(*p), level);
-    if ( rc )
-    {
-        paging_unlock(d);
-        return rc;
-    }
-
-    /* Update the entry with new content */
-    safe_write_pte(p, new);
+}
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
+static void
+sh_write_p2m_entry_post(struct p2m_domain *p2m, unsigned int oflags)
+{
+    struct domain *d = p2m->domain;
+
     /* If we're doing FAST_FAULT_PATH, then shadow mode may have
        cached the fact that this is an mmio region in the shadow
        page tables.  Blow the tables away to remove the cache.
@@ -3183,16 +3171,15 @@ shadow_write_p2m_entry(struct p2m_domain
         shadow_blow_tables(d);
         d->arch.paging.shadow.has_fast_mmio_entries = false;
     }
-#endif
-
-    paging_unlock(d);
-
-    return 0;
 }
+#else
+# define sh_write_p2m_entry_post NULL
+#endif
 
 void shadow_p2m_init(struct p2m_domain *p2m)
 {
-    p2m->write_p2m_entry = shadow_write_p2m_entry;
+    p2m->write_p2m_entry_pre  = sh_write_p2m_entry_pre;
+    p2m->write_p2m_entry_post = sh_write_p2m_entry_post;
 }
 
 /**************************************************************************/
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -272,10 +272,13 @@ struct p2m_domain {
                                                   unsigned long first_gfn,
                                                   unsigned long last_gfn);
     void               (*memory_type_changed)(struct p2m_domain *p2m);
-    
-    int                (*write_p2m_entry)(struct p2m_domain *p2m,
-                                          unsigned long gfn, l1_pgentry_t *p,
-                                          l1_pgentry_t new, unsigned int level);
+    void               (*write_p2m_entry_pre)(struct domain *d,
+                                              unsigned long gfn,
+                                              l1_pgentry_t *p,
+                                              l1_pgentry_t new,
+                                              unsigned int level);
+    void               (*write_p2m_entry_post)(struct p2m_domain *p2m,
+                                               unsigned int oflags);
 #if P2M_AUDIT
     long               (*audit_p2m)(struct p2m_domain *p2m);
 #endif
@@ -472,7 +475,7 @@ void __put_gfn(struct p2m_domain *p2m, u
  *
  * This is also used in the shadow code whenever the paging lock is
  * held -- in those cases, the caller is protected against concurrent
- * p2m updates by the fact that shadow_write_p2m_entry() also takes
+ * p2m updates by the fact that write_p2m_entry() also takes
  * the paging lock.
  *
  * Note that an unlocked accessor only makes sense for a "query" lookup.
@@ -841,8 +844,8 @@ void np2m_flush_base(struct vcpu *v, uns
 void hap_p2m_init(struct p2m_domain *p2m);
 void shadow_p2m_init(struct p2m_domain *p2m);
 
-int nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-    l1_pgentry_t *p, l1_pgentry_t new, unsigned int level);
+void nestedp2m_write_p2m_entry_post(struct p2m_domain *p2m,
+                                    unsigned int oflags);
 
 /*
  * Alternate p2m: shadow p2m tables used for alternate memory views


[PATCH v2 6/9] x86/p2m: avoid unnecessary calls of write_p2m_entry_pre() hook

Posted by Jan Beulich 3 weeks, 1 day ago
When shattering a large page, we first construct the new page table page
and only then hook it up. The "pre" hook in this case does nothing, for
the page starting out all blank. Avoid 512 calls into shadow code in
this case by passing in INVALID_GFN, indicating the page being updated
is (not yet) associated with any GFN. (The alternative to this change
would be to actually pass in a correct GFN, which can't be all the same
on every loop iteration.)

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -134,7 +134,7 @@ static int write_p2m_entry(struct p2m_do
 
         paging_lock(d);
 
-        if ( p2m->write_p2m_entry_pre )
+        if ( p2m->write_p2m_entry_pre && gfn != gfn_x(INVALID_GFN) )
             p2m->write_p2m_entry_pre(d, gfn, p, new, level);
 
         oflags = l1e_get_flags(*p);
@@ -290,7 +290,8 @@ p2m_next_level(struct p2m_domain *p2m, v
         {
             new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)),
                                      flags);
-            rc = write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
+            rc = write_p2m_entry(p2m, gfn_x(INVALID_GFN), l1_entry + i,
+                                 new_entry, level);
             if ( rc )
             {
                 unmap_domain_page(l1_entry);


Ping: [PATCH v2 6/9] x86/p2m: avoid unnecessary calls of write_p2m_entry_pre() hook

Posted by Jan Beulich 5 days, 9 hours ago
On 06.11.2020 10:37, Jan Beulich wrote:
> When shattering a large page, we first construct the new page table page
> and only then hook it up. The "pre" hook in this case does nothing, for
> the page starting out all blank. Avoid 512 calls into shadow code in
> this case by passing in INVALID_GFN, indicating the page being updated
> is (not yet) associated with any GFN. (The alternative to this change
> would be to actually pass in a correct GFN, which can't be all the same
> on every loop iteration.)
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: New.

Ping?

Thanks, Jan

> --- a/xen/arch/x86/mm/p2m-pt.c
> +++ b/xen/arch/x86/mm/p2m-pt.c
> @@ -134,7 +134,7 @@ static int write_p2m_entry(struct p2m_do
>  
>          paging_lock(d);
>  
> -        if ( p2m->write_p2m_entry_pre )
> +        if ( p2m->write_p2m_entry_pre && gfn != gfn_x(INVALID_GFN) )
>              p2m->write_p2m_entry_pre(d, gfn, p, new, level);
>  
>          oflags = l1e_get_flags(*p);
> @@ -290,7 +290,8 @@ p2m_next_level(struct p2m_domain *p2m, v
>          {
>              new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)),
>                                       flags);
> -            rc = write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
> +            rc = write_p2m_entry(p2m, gfn_x(INVALID_GFN), l1_entry + i,
> +                                 new_entry, level);
>              if ( rc )
>              {
>                  unmap_domain_page(l1_entry);
> 
> 


[PATCH v2 7/9] x86/p2m: pass old PTE directly to write_p2m_entry_pre() hook

Posted by Jan Beulich 3 weeks, 1 day ago
In no case is a pointer to non-const needed. Since no pointer arithmetic
is done by the sole user of the hook, passing in the PTE itself is quite
fine.

While doing this adjustment also
- drop the intermediate sh_write_p2m_entry_pre():
  sh_unshadow_for_p2m_change() can itself be used as the hook function,
  moving the conditional into there,
- introduce a local variable holding the flags of the old entry.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -135,7 +135,7 @@ static int write_p2m_entry(struct p2m_do
         paging_lock(d);
 
         if ( p2m->write_p2m_entry_pre && gfn != gfn_x(INVALID_GFN) )
-            p2m->write_p2m_entry_pre(d, gfn, p, new, level);
+            p2m->write_p2m_entry_pre(d, gfn, *p, new, level);
 
         oflags = l1e_get_flags(*p);
         omfn = l1e_get_mfn(*p);
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3078,19 +3078,28 @@ static int shadow_test_disable(struct do
  */
 
 static void sh_unshadow_for_p2m_change(struct domain *d, unsigned long gfn,
-                                       l1_pgentry_t *p, l1_pgentry_t new,
+                                       l1_pgentry_t old, l1_pgentry_t new,
                                        unsigned int level)
 {
+    unsigned int oflags = l1e_get_flags(old);
+
+    /*
+     * If there are any shadows, update them.  But if shadow_teardown()
+     * has already been called then it's not safe to try.
+     */
+    if ( unlikely(!d->arch.paging.shadow.total_pages) )
+        return;
+
     /* The following assertion is to make sure we don't step on 1GB host
      * page support of HVM guest. */
-    ASSERT(!(level > 2 && (l1e_get_flags(*p) & _PAGE_PRESENT) &&
-             (l1e_get_flags(*p) & _PAGE_PSE)));
+    ASSERT(!(level > 2 && (oflags & _PAGE_PRESENT) && (oflags & _PAGE_PSE)));
 
     /* If we're removing an MFN from the p2m, remove it from the shadows too */
     if ( level == 1 )
     {
-        mfn_t mfn = l1e_get_mfn(*p);
-        p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
+        mfn_t mfn = l1e_get_mfn(old);
+        p2m_type_t p2mt = p2m_flags_to_type(oflags);
+
         if ( (p2m_is_valid(p2mt) || p2m_is_grant(p2mt)) && mfn_valid(mfn) )
         {
             sh_remove_all_shadows_and_parents(d, mfn);
@@ -3102,15 +3111,15 @@ static void sh_unshadow_for_p2m_change(s
     /* If we're removing a superpage mapping from the p2m, we need to check
      * all the pages covered by it.  If they're still there in the new
      * scheme, that's OK, but otherwise they must be unshadowed. */
-    if ( level == 2 && (l1e_get_flags(*p) & _PAGE_PRESENT) &&
-         (l1e_get_flags(*p) & _PAGE_PSE) )
+    if ( level == 2 && (oflags & _PAGE_PRESENT) && (oflags & _PAGE_PSE) )
     {
         unsigned int i;
         cpumask_t flushmask;
-        mfn_t omfn = l1e_get_mfn(*p);
+        mfn_t omfn = l1e_get_mfn(old);
         mfn_t nmfn = l1e_get_mfn(new);
         l1_pgentry_t *npte = NULL;
-        p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
+        p2m_type_t p2mt = p2m_flags_to_type(oflags);
+
         if ( p2m_is_valid(p2mt) && mfn_valid(omfn) )
         {
             cpumask_clear(&flushmask);
@@ -3144,16 +3153,6 @@ static void sh_unshadow_for_p2m_change(s
     }
 }
 
-static void
-sh_write_p2m_entry_pre(struct domain *d, unsigned long gfn, l1_pgentry_t *p,
-                       l1_pgentry_t new, unsigned int level)
-{
-    /* If there are any shadows, update them.  But if shadow_teardown()
-     * has already been called then it's not safe to try. */
-    if ( likely(d->arch.paging.shadow.total_pages != 0) )
-         sh_unshadow_for_p2m_change(d, gfn, p, new, level);
-}
-
 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
 static void
 sh_write_p2m_entry_post(struct p2m_domain *p2m, unsigned int oflags)
@@ -3178,7 +3177,7 @@ sh_write_p2m_entry_post(struct p2m_domai
 
 void shadow_p2m_init(struct p2m_domain *p2m)
 {
-    p2m->write_p2m_entry_pre  = sh_write_p2m_entry_pre;
+    p2m->write_p2m_entry_pre  = sh_unshadow_for_p2m_change;
     p2m->write_p2m_entry_post = sh_write_p2m_entry_post;
 }
 
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -274,7 +274,7 @@ struct p2m_domain {
     void               (*memory_type_changed)(struct p2m_domain *p2m);
     void               (*write_p2m_entry_pre)(struct domain *d,
                                               unsigned long gfn,
-                                              l1_pgentry_t *p,
+                                              l1_pgentry_t old,
                                               l1_pgentry_t new,
                                               unsigned int level);
     void               (*write_p2m_entry_post)(struct p2m_domain *p2m,


Re: [PATCH v2 7/9] x86/p2m: pass old PTE directly to write_p2m_entry_pre() hook

Posted by Tim Deegan 3 weeks ago
At 10:38 +0100 on 06 Nov (1604659085), Jan Beulich wrote:
> In no case is a pointer to non-const needed. Since no pointer arithmetic
> is done by the sole user of the hook, passing in the PTE itself is quite
> fine.
> 
> While doing this adjustment also
> - drop the intermediate sh_write_p2m_entry_pre():
>   sh_unshadow_for_p2m_change() can itself be used as the hook function,
>   moving the conditional into there,
> - introduce a local variable holding the flags of the old entry.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Tim Deegan <tim@xen.org>


[PATCH v2 8/9] x86/shadow: cosmetics to sh_unshadow_for_p2m_change()

Posted by Jan Beulich 3 weeks, 1 day ago
Besides the adjustments for style
- use switch(),
- widen scope of commonly used variables,
- narrow scope of other variables.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3081,7 +3081,9 @@ static void sh_unshadow_for_p2m_change(s
                                        l1_pgentry_t old, l1_pgentry_t new,
                                        unsigned int level)
 {
+    mfn_t omfn = l1e_get_mfn(old);
     unsigned int oflags = l1e_get_flags(old);
+    p2m_type_t p2mt = p2m_flags_to_type(oflags);
 
     /*
      * If there are any shadows, update them.  But if shadow_teardown()
@@ -3090,53 +3092,57 @@ static void sh_unshadow_for_p2m_change(s
     if ( unlikely(!d->arch.paging.shadow.total_pages) )
         return;
 
-    /* The following assertion is to make sure we don't step on 1GB host
-     * page support of HVM guest. */
-    ASSERT(!(level > 2 && (oflags & _PAGE_PRESENT) && (oflags & _PAGE_PSE)));
-
-    /* If we're removing an MFN from the p2m, remove it from the shadows too */
-    if ( level == 1 )
+    switch ( level )
     {
-        mfn_t mfn = l1e_get_mfn(old);
-        p2m_type_t p2mt = p2m_flags_to_type(oflags);
+    default:
+        /*
+         * The following assertion is to make sure we don't step on 1GB host
+         * page support of HVM guest.
+         */
+        ASSERT(!((oflags & _PAGE_PRESENT) && (oflags & _PAGE_PSE)));
+        break;
 
-        if ( (p2m_is_valid(p2mt) || p2m_is_grant(p2mt)) && mfn_valid(mfn) )
+    /* If we're removing an MFN from the p2m, remove it from the shadows too */
+    case 1:
+        if ( (p2m_is_valid(p2mt) || p2m_is_grant(p2mt)) && mfn_valid(omfn) )
         {
-            sh_remove_all_shadows_and_parents(d, mfn);
-            if ( sh_remove_all_mappings(d, mfn, _gfn(gfn)) )
+            sh_remove_all_shadows_and_parents(d, omfn);
+            if ( sh_remove_all_mappings(d, omfn, _gfn(gfn)) )
                 guest_flush_tlb_mask(d, d->dirty_cpumask);
         }
-    }
+        break;
 
-    /* If we're removing a superpage mapping from the p2m, we need to check
+    /*
+     * If we're removing a superpage mapping from the p2m, we need to check
      * all the pages covered by it.  If they're still there in the new
-     * scheme, that's OK, but otherwise they must be unshadowed. */
-    if ( level == 2 && (oflags & _PAGE_PRESENT) && (oflags & _PAGE_PSE) )
-    {
-        unsigned int i;
-        cpumask_t flushmask;
-        mfn_t omfn = l1e_get_mfn(old);
-        mfn_t nmfn = l1e_get_mfn(new);
-        l1_pgentry_t *npte = NULL;
-        p2m_type_t p2mt = p2m_flags_to_type(oflags);
+     * scheme, that's OK, but otherwise they must be unshadowed.
+     */
+    case 2:
+        if ( !(oflags & _PAGE_PRESENT) || !(oflags & _PAGE_PSE) )
+            break;
 
         if ( p2m_is_valid(p2mt) && mfn_valid(omfn) )
         {
+            unsigned int i;
+            cpumask_t flushmask;
+            mfn_t nmfn = l1e_get_mfn(new);
+            l1_pgentry_t *npte = NULL;
+
             cpumask_clear(&flushmask);
 
             /* If we're replacing a superpage with a normal L1 page, map it */
-            if ( (l1e_get_flags(new) & _PAGE_PRESENT)
-                 && !(l1e_get_flags(new) & _PAGE_PSE)
-                 && mfn_valid(nmfn) )
+            if ( (l1e_get_flags(new) & _PAGE_PRESENT) &&
+                 !(l1e_get_flags(new) & _PAGE_PSE) &&
+                 mfn_valid(nmfn) )
                 npte = map_domain_page(nmfn);
 
             gfn &= ~(L1_PAGETABLE_ENTRIES - 1);
 
             for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
             {
-                if ( !npte
-                     || !p2m_is_ram(p2m_flags_to_type(l1e_get_flags(npte[i])))
-                     || !mfn_eq(l1e_get_mfn(npte[i]), omfn) )
+                if ( !npte ||
+                     !p2m_is_ram(p2m_flags_to_type(l1e_get_flags(npte[i]))) ||
+                     !mfn_eq(l1e_get_mfn(npte[i]), omfn) )
                 {
                     /* This GFN->MFN mapping has gone away */
                     sh_remove_all_shadows_and_parents(d, omfn);
@@ -3150,6 +3156,8 @@ static void sh_unshadow_for_p2m_change(s
             if ( npte )
                 unmap_domain_page(npte);
         }
+
+        break;
     }
 }
 


Re: [PATCH v2 8/9] x86/shadow: cosmetics to sh_unshadow_for_p2m_change()

Posted by Tim Deegan 3 weeks ago
At 10:38 +0100 on 06 Nov (1604659127), Jan Beulich wrote:
> Besides the adjustments for style
> - use switch(),
> - widen scope of commonly used variables,
> - narrow scope of other variables.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Tim Deegan <tim@xen.org>

[PATCH v2 9/9] x86/shadow: adjust TLB flushing in sh_unshadow_for_p2m_change()

Posted by Jan Beulich 3 weeks, 1 day ago
Accumulating transient state of d->dirty_cpumask in a local variable is
unnecessary here: The flush is fine to make with the dirty set at the
time of the call. With this, move the invocation to a central place at
the end of the function.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3084,6 +3084,7 @@ static void sh_unshadow_for_p2m_change(s
     mfn_t omfn = l1e_get_mfn(old);
     unsigned int oflags = l1e_get_flags(old);
     p2m_type_t p2mt = p2m_flags_to_type(oflags);
+    bool flush = false;
 
     /*
      * If there are any shadows, update them.  But if shadow_teardown()
@@ -3108,7 +3109,7 @@ static void sh_unshadow_for_p2m_change(s
         {
             sh_remove_all_shadows_and_parents(d, omfn);
             if ( sh_remove_all_mappings(d, omfn, _gfn(gfn)) )
-                guest_flush_tlb_mask(d, d->dirty_cpumask);
+                flush = true;
         }
         break;
 
@@ -3124,12 +3125,9 @@ static void sh_unshadow_for_p2m_change(s
         if ( p2m_is_valid(p2mt) && mfn_valid(omfn) )
         {
             unsigned int i;
-            cpumask_t flushmask;
             mfn_t nmfn = l1e_get_mfn(new);
             l1_pgentry_t *npte = NULL;
 
-            cpumask_clear(&flushmask);
-
             /* If we're replacing a superpage with a normal L1 page, map it */
             if ( (l1e_get_flags(new) & _PAGE_PRESENT) &&
                  !(l1e_get_flags(new) & _PAGE_PSE) &&
@@ -3147,11 +3145,10 @@ static void sh_unshadow_for_p2m_change(s
                     /* This GFN->MFN mapping has gone away */
                     sh_remove_all_shadows_and_parents(d, omfn);
                     if ( sh_remove_all_mappings(d, omfn, _gfn(gfn + i)) )
-                        cpumask_or(&flushmask, &flushmask, d->dirty_cpumask);
+                        flush = true;
                 }
                 omfn = mfn_add(omfn, 1);
             }
-            guest_flush_tlb_mask(d, &flushmask);
 
             if ( npte )
                 unmap_domain_page(npte);
@@ -3159,6 +3156,9 @@ static void sh_unshadow_for_p2m_change(s
 
         break;
     }
+
+    if ( flush )
+        guest_flush_tlb_mask(d, d->dirty_cpumask);
 }
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)


Re: [PATCH v2 9/9] x86/shadow: adjust TLB flushing in sh_unshadow_for_p2m_change()

Posted by Tim Deegan 3 weeks ago
At 10:39 +0100 on 06 Nov (1604659168), Jan Beulich wrote:
> Accumulating transient state of d->dirty_cpumask in a local variable is
> unnecessary here: The flush is fine to make with the dirty set at the
> time of the call. With this, move the invocation to a central place at
> the end of the function.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Tim Deegan <tim@xen.org>