[PATCH v2 0/5] x86: M2P maintenance adjustments (step 1)

Jan Beulich posted 5 patches 3 years, 7 months ago
Only 0 patches received!
[PATCH v2 0/5] x86: M2P maintenance adjustments (step 1)
Posted by Jan Beulich 3 years, 7 months ago
As pointed out by Andrew, maintaining the compat M2P when !PV32
(or when "pv=no-32" was given) is a pointless waste of memory. Do
away with this, with a possible plan to subsequently use the
compat instance instead of the native one in shim mode with a
32-bit guest (requiring more intrusive rework, in particular to
delay setting up the M2P until the bitness of the client domain
is known).

1: convert set_gpfn_from_mfn() to a function
2: don't maintain compat M2P when !PV32
3: don't override INVALID_M2P_ENTRY with SHARED_M2P_ENTRY
4: PV: also check kernel endianness when building Dom0
5: simplify is_guest_l2_slot()

Jan

[PATCH v2 1/5] x86: convert set_gpfn_from_mfn() to a function
Posted by Jan Beulich 3 years, 7 months ago
It is already a little too heavy for a macro, and more logic is about to
get added to it.

This also allows reducing the scope of compat_machine_to_phys_mapping.

Requested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -40,6 +40,8 @@ EMIT_FILE;
 #include <asm/mem_sharing.h>
 #include <public/memory.h>
 
+#define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
+
 unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
 
 l2_pgentry_t *compat_idle_pg_table_l2;
@@ -1454,6 +1456,20 @@ destroy_frametable:
     return ret;
 }
 
+void set_gpfn_from_mfn(unsigned long mfn, unsigned long pfn)
+{
+    const struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn)));
+    unsigned long entry = (d && (d == dom_cow)) ? SHARED_M2P_ENTRY : pfn;
+
+    if ( unlikely(!machine_to_phys_mapping_valid) )
+        return;
+
+    if ( mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 )
+        compat_machine_to_phys_mapping[mfn] = entry;
+
+    machine_to_phys_mapping[mfn] = entry;
+}
+
 #include "compat/mm.c"
 
 /*
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -494,25 +494,13 @@ extern paddr_t mem_hotplug;
 #define SHARED_M2P_ENTRY         (~0UL - 1UL)
 #define SHARED_M2P(_e)           ((_e) == SHARED_M2P_ENTRY)
 
-#define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
-#define _set_gpfn_from_mfn(mfn, pfn) ({                        \
-    struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn))); \
-    unsigned long entry = (d && (d == dom_cow)) ?              \
-        SHARED_M2P_ENTRY : (pfn);                              \
-    ((void)((mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \
-            (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(entry))), \
-     machine_to_phys_mapping[(mfn)] = (entry));                \
-    })
-
 /*
  * Disable some users of set_gpfn_from_mfn() (e.g., free_heap_pages()) until
  * the machine_to_phys_mapping is actually set up.
  */
 extern bool machine_to_phys_mapping_valid;
-#define set_gpfn_from_mfn(mfn, pfn) do {        \
-    if ( machine_to_phys_mapping_valid )        \
-        _set_gpfn_from_mfn(mfn, pfn);           \
-} while (0)
+
+void set_gpfn_from_mfn(unsigned long mfn, unsigned long pfn);
 
 extern struct rangeset *mmio_ro_ranges;
 


[PATCH v2 2/5] x86: don't maintain compat M2P when !PV32
Posted by Jan Beulich 3 years, 7 months ago
It's effectively unused in this case (as well as when "pv=no-32").

While touching their definitions anyway, also adjust section placement
of m2p_compat_vstart and compat_idle_pg_table_l2. Similarly, while
putting init_xen_pae_l2_slots() inside #ifdef, also move it to a PV-only
source file.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Drop stray changes to setup_compat_m2p_table(). Re-base over
    set_gpfn_from_mfn() conversion to function.

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -764,8 +764,10 @@ int arch_domain_create(struct domain *d,
     }
     d->arch.emulation_flags = emflags;
 
+#ifdef CONFIG_PV32
     HYPERVISOR_COMPAT_VIRT_START(d) =
         is_pv_domain(d) ? __HYPERVISOR_COMPAT_VIRT_START : ~0u;
+#endif
 
     if ( (rc = paging_domain_init(d)) != 0 )
         goto fail;
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1427,13 +1427,6 @@ static bool pae_xen_mappings_check(const
     return true;
 }
 
-void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d)
-{
-    memcpy(&l2t[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
-           compat_idle_pg_table_l2,
-           COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2t));
-}
-
 static int promote_l2_table(struct page_info *page, unsigned long type)
 {
     struct domain *d = page_get_owner(page);
--- a/xen/arch/x86/pv/mm.c
+++ b/xen/arch/x86/pv/mm.c
@@ -128,6 +128,15 @@ bool pv_map_ldt_shadow_page(unsigned int
     return true;
 }
 
+#ifdef CONFIG_PV32
+void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d)
+{
+    memcpy(&l2t[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
+           compat_idle_pg_table_l2,
+           COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2t));
+}
+#endif
+
 /*
  * Local variables:
  * mode: C
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -103,6 +103,9 @@ int compat_arch_memory_op(unsigned long
             .max_mfn = MACH2PHYS_COMPAT_NR_ENTRIES(d) - 1
         };
 
+        if ( !opt_pv32 )
+            return -EOPNOTSUPP;
+
         if ( copy_to_guest(arg, &mapping, 1) )
             rc = -EFAULT;
 
@@ -115,6 +118,9 @@ int compat_arch_memory_op(unsigned long
         unsigned long limit;
         compat_pfn_t last_mfn;
 
+        if ( !opt_pv32 )
+            return -EOPNOTSUPP;
+
         if ( copy_from_guest(&xmml, arg, 1) )
             return -EFAULT;
 
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -34,17 +34,31 @@ EMIT_FILE;
 #include <asm/fixmap.h>
 #include <asm/hypercall.h>
 #include <asm/msr.h>
+#include <asm/pv/domain.h>
 #include <asm/setup.h>
 #include <asm/numa.h>
 #include <asm/mem_paging.h>
 #include <asm/mem_sharing.h>
 #include <public/memory.h>
 
+#ifdef CONFIG_PV32
+
 #define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
 
-unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
+unsigned int __initdata m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
+
+l2_pgentry_t *__read_mostly compat_idle_pg_table_l2;
+
+#else /* !CONFIG_PV32 */
+
+/*
+ * Declare the symbol such that (dead) code referencing it can be built
+ * without a lot of #ifdef-ary, but mark it fully const and don't define
+ * this symbol anywhere (relying on DCE by the compiler).
+ */
+extern const unsigned int *const compat_machine_to_phys_mapping;
 
-l2_pgentry_t *compat_idle_pg_table_l2;
+#endif /* CONFIG_PV32 */
 
 void *do_page_walk(struct vcpu *v, unsigned long addr)
 {
@@ -220,7 +234,8 @@ static void destroy_compat_m2p_mapping(s
 {
     unsigned long i, smap = info->spfn, emap = info->spfn;
 
-    if ( smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
+    if ( !opt_pv32 ||
+         smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
         return;
 
     if ( emap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
@@ -328,7 +343,8 @@ static int setup_compat_m2p_table(struct
      * Notice: For hot-added memory, only range below m2p_compat_vstart
      * will be filled up (assuming memory is discontinous when booting).
      */
-    if   ((smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) )
+    if ( !opt_pv32 ||
+         (smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) )
         return 0;
 
     if ( epfn > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
@@ -611,17 +627,24 @@ void __init paging_init(void)
 #undef MFN
 
     /* Create user-accessible L2 directory to map the MPT for compat guests. */
-    if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL )
-        goto nomem;
-    compat_idle_pg_table_l2 = l2_ro_mpt;
-    clear_page(l2_ro_mpt);
-    /* Allocate and map the compatibility mode machine-to-phys table. */
-    mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
-    if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START )
-        mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START;
-    mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
-    if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END )
-        m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
+    if ( opt_pv32 )
+    {
+        if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL )
+            goto nomem;
+        compat_idle_pg_table_l2 = l2_ro_mpt;
+        clear_page(l2_ro_mpt);
+
+        /* Allocate and map the compatibility mode machine-to-phys table. */
+        mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
+        if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START )
+            mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START;
+        mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
+        if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END )
+            m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
+    }
+    else
+        mpt_size = 0;
+
 #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
 #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
              sizeof(*compat_machine_to_phys_mapping))
@@ -847,23 +870,24 @@ void __init subarch_init_memory(void)
                 mfn_to_page(_mfn(m2p_start_mfn + i)), SHARE_ro);
     }
 
-    for ( v  = RDWR_COMPAT_MPT_VIRT_START;
-          v != RDWR_COMPAT_MPT_VIRT_END;
-          v += 1 << L2_PAGETABLE_SHIFT )
-    {
-        l3e = l3e_from_l4e(idle_pg_table[l4_table_offset(v)],
-                           l3_table_offset(v));
-        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-            continue;
-        l2e = l2e_from_l3e(l3e, l2_table_offset(v));
-        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-            continue;
-        m2p_start_mfn = l2e_get_pfn(l2e);
+    if ( opt_pv32 )
+        for ( v  = RDWR_COMPAT_MPT_VIRT_START;
+              v != RDWR_COMPAT_MPT_VIRT_END;
+              v += 1 << L2_PAGETABLE_SHIFT )
+        {
+            l3e = l3e_from_l4e(idle_pg_table[l4_table_offset(v)],
+                               l3_table_offset(v));
+            if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+                continue;
+            l2e = l2e_from_l3e(l3e, l2_table_offset(v));
+            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+                continue;
+            m2p_start_mfn = l2e_get_pfn(l2e);
 
-        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
-            share_xen_page_with_privileged_guests(
-                mfn_to_page(_mfn(m2p_start_mfn + i)), SHARE_ro);
-    }
+            for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+                share_xen_page_with_privileged_guests(
+                    mfn_to_page(_mfn(m2p_start_mfn + i)), SHARE_ro);
+        }
 
     /* Mark all of direct map NX if hardware supports it. */
     if ( !cpu_has_nx )
@@ -935,6 +959,9 @@ long subarch_memory_op(unsigned long cmd
         break;
 
     case XENMEM_machphys_compat_mfn_list:
+        if ( !opt_pv32 )
+            return -EOPNOTSUPP;
+
         if ( copy_from_guest(&xmml, arg, 1) )
             return -EFAULT;
 
@@ -1464,7 +1491,8 @@ void set_gpfn_from_mfn(unsigned long mfn
     if ( unlikely(!machine_to_phys_mapping_valid) )
         return;
 
-    if ( mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 )
+    if ( opt_pv32 &&
+         mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 )
         compat_machine_to_phys_mapping[mfn] = entry;
 
     machine_to_phys_mapping[mfn] = entry;
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -142,7 +142,7 @@ extern unsigned char boot_edid_info[128]
  *  0xffff82c000000000 - 0xffff82cfffffffff [64GB,  2^36 bytes, PML4:261]
  *    vmap()/ioremap()/fixmap area.
  *  0xffff82d000000000 - 0xffff82d03fffffff [1GB,   2^30 bytes, PML4:261]
- *    Compatibility machine-to-phys translation table.
+ *    Compatibility machine-to-phys translation table (CONFIG_PV32).
  *  0xffff82d040000000 - 0xffff82d07fffffff [1GB,   2^30 bytes, PML4:261]
  *    Xen text, static data, bss.
 #ifndef CONFIG_BIGMEM
@@ -246,9 +246,18 @@ extern unsigned char boot_edid_info[128]
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_PV32
+
 /* This is not a fixed value, just a lower limit. */
 #define __HYPERVISOR_COMPAT_VIRT_START 0xF5800000
 #define HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart)
+
+#else /* !CONFIG_PV32 */
+
+#define HYPERVISOR_COMPAT_VIRT_START(d) ((void)(d), 0)
+
+#endif /* CONFIG_PV32 */
+
 #define MACH2PHYS_COMPAT_VIRT_START    HYPERVISOR_COMPAT_VIRT_START
 #define MACH2PHYS_COMPAT_VIRT_END      0xFFE00000
 #define MACH2PHYS_COMPAT_NR_ENTRIES(d) \
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -307,7 +307,9 @@ struct arch_domain
 {
     struct page_info *perdomain_l3_pg;
 
+#ifdef CONFIG_PV32
     unsigned int hv_compat_vstart;
+#endif
 
     /* Maximum physical-address bitwidth supported by this guest. */
     unsigned int physaddr_bitsize;
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -42,8 +42,12 @@
 #define _PGT_validated    PG_shift(6)
 #define PGT_validated     PG_mask(1, 6)
  /* PAE only: is this an L2 page directory containing Xen-private mappings? */
+#ifdef CONFIG_PV32
 #define _PGT_pae_xen_l2   PG_shift(7)
 #define PGT_pae_xen_l2    PG_mask(1, 7)
+#else
+#define PGT_pae_xen_l2    0
+#endif
 /* Has this page been *partially* validated for use as its current type? */
 #define _PGT_partial      PG_shift(8)
 #define PGT_partial       PG_mask(1, 8)


[PATCH v2 3/5] x86: don't override INVALID_M2P_ENTRY with SHARED_M2P_ENTRY
Posted by Jan Beulich 3 years, 7 months ago
While in most cases code ahead of the invocation of set_gpfn_from_mfn()
deals with shared pages, at least in set_typed_p2m_entry() I can't spot
such handling (it's entirely possible there's code missing there). Let's
try to play safe and add an extra check.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Re-base over set_gpfn_from_mfn() conversion to function.

--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1485,12 +1485,19 @@ destroy_frametable:
 
 void set_gpfn_from_mfn(unsigned long mfn, unsigned long pfn)
 {
-    const struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn)));
-    unsigned long entry = (d && (d == dom_cow)) ? SHARED_M2P_ENTRY : pfn;
+    unsigned long entry = pfn;
 
     if ( unlikely(!machine_to_phys_mapping_valid) )
         return;
 
+    if ( entry != INVALID_M2P_ENTRY )
+    {
+        const struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn)));
+
+        if ( d && (d == dom_cow) )
+            entry = SHARED_M2P_ENTRY;
+    }
+
     if ( opt_pv32 &&
          mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 )
         compat_machine_to_phys_mapping[mfn] = entry;


[PATCH v2 4/5] x86/PV: also check kernel endianness when building Dom0
Posted by Jan Beulich 3 years, 7 months ago
While big endian x86 images are pretty unlikely to appear, merely
logging endianness isn't of much use.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -288,7 +288,8 @@ int __init dom0_construct_pv(struct doma
                              module_t *initrd,
                              char *cmdline)
 {
-    int i, rc, compatible, order, machine;
+    int i, rc, order, machine;
+    bool compatible;
     struct cpu_user_regs *regs;
     unsigned long pfn, mfn;
     unsigned long nr_pages;
@@ -358,7 +359,7 @@ int __init dom0_construct_pv(struct doma
     /* compatibility check */
     printk(" Xen  kernel: 64-bit, lsb%s\n",
            IS_ENABLED(CONFIG_PV32) ? ", compat32" : "");
-    compatible = 0;
+    compatible = false;
     machine = elf_uval(&elf, elf.ehdr, e_machine);
 
 #ifdef CONFIG_PV32
@@ -374,13 +375,16 @@ int __init dom0_construct_pv(struct doma
                 return rc;
             }
 
-            compatible = 1;
+            compatible = true;
         }
     }
 #endif
 
     if ( elf_64bit(&elf) && machine == EM_X86_64 )
-        compatible = 1;
+        compatible = true;
+
+    if ( elf_msb(&elf) )
+        compatible = false;
 
     printk(" Dom0 kernel: %s-bit%s, %s, paddr %#" PRIx64 " -> %#" PRIx64 "\n",
            elf_64bit(&elf) ? "64" : elf_32bit(&elf) ? "32" : "??",


[PATCH v2 5/5] x86: simplify is_guest_l2_slot()
Posted by Jan Beulich 3 years, 7 months ago
is_pv_32bit_domain() has become expensive, and its use here is
redundant: Only 32-bit guests would ever get PGT_pae_xen_l2 set on
their L2 page table pages anyway.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.

--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -106,8 +106,7 @@ typedef l4_pgentry_t root_pgentry_t;
 #define l4_linear_offset(_a) (((_a) & VADDR_MASK) >> L4_PAGETABLE_SHIFT)
 
 #define is_guest_l2_slot(_d, _t, _s)                   \
-    ( !is_pv_32bit_domain(_d) ||                       \
-      !((_t) & PGT_pae_xen_l2) ||                      \
+    ( !((_t) & PGT_pae_xen_l2) ||                      \
       ((_s) < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_d)) )
 #define is_guest_l4_slot(_d, _s)                    \
     ( is_pv_32bit_domain(_d)                        \