As pointed out by Andrew, maintaining the compat M2P when !PV32 (or when "pv=no-32" was given) is a pointless waste of memory. Do away with this, with a possible plan to subsequently use the compat instance instead of the native one in shim mode with a 32-bit guest (requiring more intrusive rework, in particular to delay setting up the M2P until the bitness of the client domain is known). 1: convert set_gpfn_from_mfn() to a function 2: don't maintain compat M2P when !PV32 3: don't override INVALID_M2P_ENTRY with SHARED_M2P_ENTRY 4: PV: also check kernel endianness when building Dom0 5: simplify is_guest_l2_slot() Jan
It is already a little too heavy for a macro, and more logic is about to get added to it. This also allows reducing the scope of compat_machine_to_phys_mapping. Requested-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: New. --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -40,6 +40,8 @@ EMIT_FILE; #include <asm/mem_sharing.h> #include <public/memory.h> +#define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START) + unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; l2_pgentry_t *compat_idle_pg_table_l2; @@ -1454,6 +1456,20 @@ destroy_frametable: return ret; } +void set_gpfn_from_mfn(unsigned long mfn, unsigned long pfn) +{ + const struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn))); + unsigned long entry = (d && (d == dom_cow)) ? SHARED_M2P_ENTRY : pfn; + + if ( unlikely(!machine_to_phys_mapping_valid) ) + return; + + if ( mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 ) + compat_machine_to_phys_mapping[mfn] = entry; + + machine_to_phys_mapping[mfn] = entry; +} + #include "compat/mm.c" /* --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -494,25 +494,13 @@ extern paddr_t mem_hotplug; #define SHARED_M2P_ENTRY (~0UL - 1UL) #define SHARED_M2P(_e) ((_e) == SHARED_M2P_ENTRY) -#define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START) -#define _set_gpfn_from_mfn(mfn, pfn) ({ \ - struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn))); \ - unsigned long entry = (d && (d == dom_cow)) ? \ - SHARED_M2P_ENTRY : (pfn); \ - ((void)((mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \ - (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(entry))), \ - machine_to_phys_mapping[(mfn)] = (entry)); \ - }) - /* * Disable some users of set_gpfn_from_mfn() (e.g., free_heap_pages()) until * the machine_to_phys_mapping is actually set up. */ extern bool machine_to_phys_mapping_valid; -#define set_gpfn_from_mfn(mfn, pfn) do { \ - if ( machine_to_phys_mapping_valid ) \ - _set_gpfn_from_mfn(mfn, pfn); \ -} while (0) + +void set_gpfn_from_mfn(unsigned long mfn, unsigned long pfn); extern struct rangeset *mmio_ro_ranges;
It's effectively unused in this case (as well as when "pv=no-32"). While touching their definitions anyway, also adjust section placement of m2p_compat_vstart and compat_idle_pg_table_l2. Similarly, while putting init_xen_pae_l2_slots() inside #ifdef, also move it to a PV-only source file. Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: Drop stray changes to setup_compat_m2p_table(). Re-base over set_gpfn_from_mfn() conversion to function. --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -764,8 +764,10 @@ int arch_domain_create(struct domain *d, } d->arch.emulation_flags = emflags; +#ifdef CONFIG_PV32 HYPERVISOR_COMPAT_VIRT_START(d) = is_pv_domain(d) ? __HYPERVISOR_COMPAT_VIRT_START : ~0u; +#endif if ( (rc = paging_domain_init(d)) != 0 ) goto fail; --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1427,13 +1427,6 @@ static bool pae_xen_mappings_check(const return true; } -void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d) -{ - memcpy(&l2t[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], - compat_idle_pg_table_l2, - COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2t)); -} - static int promote_l2_table(struct page_info *page, unsigned long type) { struct domain *d = page_get_owner(page); --- a/xen/arch/x86/pv/mm.c +++ b/xen/arch/x86/pv/mm.c @@ -128,6 +128,15 @@ bool pv_map_ldt_shadow_page(unsigned int return true; } +#ifdef CONFIG_PV32 +void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d) +{ + memcpy(&l2t[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)], + compat_idle_pg_table_l2, + COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2t)); +} +#endif + /* * Local variables: * mode: C --- a/xen/arch/x86/x86_64/compat/mm.c +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -103,6 +103,9 @@ int compat_arch_memory_op(unsigned long .max_mfn = MACH2PHYS_COMPAT_NR_ENTRIES(d) - 1 }; + if ( !opt_pv32 ) + return -EOPNOTSUPP; + if ( copy_to_guest(arg, &mapping, 1) ) rc = -EFAULT; @@ -115,6 +118,9 @@ int compat_arch_memory_op(unsigned long unsigned long limit; compat_pfn_t last_mfn; + if ( !opt_pv32 ) + return -EOPNOTSUPP; + if ( copy_from_guest(&xmml, arg, 1) ) return -EFAULT; --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -34,17 +34,31 @@ EMIT_FILE; #include <asm/fixmap.h> #include <asm/hypercall.h> #include <asm/msr.h> +#include <asm/pv/domain.h> #include <asm/setup.h> #include <asm/numa.h> #include <asm/mem_paging.h> #include <asm/mem_sharing.h> #include <public/memory.h> +#ifdef CONFIG_PV32 + #define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START) -unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; +unsigned int __initdata m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; + +l2_pgentry_t *__read_mostly compat_idle_pg_table_l2; + +#else /* !CONFIG_PV32 */ + +/* + * Declare the symbol such that (dead) code referencing it can be built + * without a lot of #ifdef-ary, but mark it fully const and don't define + * this symbol anywhere (relying on DCE by the compiler). + */ +extern const unsigned int *const compat_machine_to_phys_mapping; -l2_pgentry_t *compat_idle_pg_table_l2; +#endif /* CONFIG_PV32 */ void *do_page_walk(struct vcpu *v, unsigned long addr) { @@ -220,7 +234,8 @@ static void destroy_compat_m2p_mapping(s { unsigned long i, smap = info->spfn, emap = info->spfn; - if ( smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) ) + if ( !opt_pv32 || + smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) ) return; if ( emap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) ) @@ -328,7 +343,8 @@ static int setup_compat_m2p_table(struct * Notice: For hot-added memory, only range below m2p_compat_vstart * will be filled up (assuming memory is discontinous when booting). */ - if ((smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) ) + if ( !opt_pv32 || + (smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) ) return 0; if ( epfn > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) ) @@ -611,17 +627,24 @@ void __init paging_init(void) #undef MFN /* Create user-accessible L2 directory to map the MPT for compat guests. */ - if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL ) - goto nomem; - compat_idle_pg_table_l2 = l2_ro_mpt; - clear_page(l2_ro_mpt); - /* Allocate and map the compatibility mode machine-to-phys table. */ - mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1)); - if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START ) - mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START; - mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); - if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END ) - m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size; + if ( opt_pv32 ) + { + if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL ) + goto nomem; + compat_idle_pg_table_l2 = l2_ro_mpt; + clear_page(l2_ro_mpt); + + /* Allocate and map the compatibility mode machine-to-phys table. */ + mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1)); + if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START ) + mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START; + mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); + if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END ) + m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size; + } + else + mpt_size = 0; + #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int)) #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ sizeof(*compat_machine_to_phys_mapping)) @@ -847,23 +870,24 @@ void __init subarch_init_memory(void) mfn_to_page(_mfn(m2p_start_mfn + i)), SHARE_ro); } - for ( v = RDWR_COMPAT_MPT_VIRT_START; - v != RDWR_COMPAT_MPT_VIRT_END; - v += 1 << L2_PAGETABLE_SHIFT ) - { - l3e = l3e_from_l4e(idle_pg_table[l4_table_offset(v)], - l3_table_offset(v)); - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) - continue; - l2e = l2e_from_l3e(l3e, l2_table_offset(v)); - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - continue; - m2p_start_mfn = l2e_get_pfn(l2e); + if ( opt_pv32 ) + for ( v = RDWR_COMPAT_MPT_VIRT_START; + v != RDWR_COMPAT_MPT_VIRT_END; + v += 1 << L2_PAGETABLE_SHIFT ) + { + l3e = l3e_from_l4e(idle_pg_table[l4_table_offset(v)], + l3_table_offset(v)); + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + continue; + l2e = l2e_from_l3e(l3e, l2_table_offset(v)); + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + continue; + m2p_start_mfn = l2e_get_pfn(l2e); - for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) - share_xen_page_with_privileged_guests( - mfn_to_page(_mfn(m2p_start_mfn + i)), SHARE_ro); - } + for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) + share_xen_page_with_privileged_guests( + mfn_to_page(_mfn(m2p_start_mfn + i)), SHARE_ro); + } /* Mark all of direct map NX if hardware supports it. */ if ( !cpu_has_nx ) @@ -935,6 +959,9 @@ long subarch_memory_op(unsigned long cmd break; case XENMEM_machphys_compat_mfn_list: + if ( !opt_pv32 ) + return -EOPNOTSUPP; + if ( copy_from_guest(&xmml, arg, 1) ) return -EFAULT; @@ -1464,7 +1491,8 @@ void set_gpfn_from_mfn(unsigned long mfn if ( unlikely(!machine_to_phys_mapping_valid) ) return; - if ( mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 ) + if ( opt_pv32 && + mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 ) compat_machine_to_phys_mapping[mfn] = entry; machine_to_phys_mapping[mfn] = entry; --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -142,7 +142,7 @@ extern unsigned char boot_edid_info[128] * 0xffff82c000000000 - 0xffff82cfffffffff [64GB, 2^36 bytes, PML4:261] * vmap()/ioremap()/fixmap area. * 0xffff82d000000000 - 0xffff82d03fffffff [1GB, 2^30 bytes, PML4:261] - * Compatibility machine-to-phys translation table. + * Compatibility machine-to-phys translation table (CONFIG_PV32). * 0xffff82d040000000 - 0xffff82d07fffffff [1GB, 2^30 bytes, PML4:261] * Xen text, static data, bss. #ifndef CONFIG_BIGMEM @@ -246,9 +246,18 @@ extern unsigned char boot_edid_info[128] #ifndef __ASSEMBLY__ +#ifdef CONFIG_PV32 + /* This is not a fixed value, just a lower limit. */ #define __HYPERVISOR_COMPAT_VIRT_START 0xF5800000 #define HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart) + +#else /* !CONFIG_PV32 */ + +#define HYPERVISOR_COMPAT_VIRT_START(d) ((void)(d), 0) + +#endif /* CONFIG_PV32 */ + #define MACH2PHYS_COMPAT_VIRT_START HYPERVISOR_COMPAT_VIRT_START #define MACH2PHYS_COMPAT_VIRT_END 0xFFE00000 #define MACH2PHYS_COMPAT_NR_ENTRIES(d) \ --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -307,7 +307,9 @@ struct arch_domain { struct page_info *perdomain_l3_pg; +#ifdef CONFIG_PV32 unsigned int hv_compat_vstart; +#endif /* Maximum physical-address bitwidth supported by this guest. */ unsigned int physaddr_bitsize; --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -42,8 +42,12 @@ #define _PGT_validated PG_shift(6) #define PGT_validated PG_mask(1, 6) /* PAE only: is this an L2 page directory containing Xen-private mappings? */ +#ifdef CONFIG_PV32 #define _PGT_pae_xen_l2 PG_shift(7) #define PGT_pae_xen_l2 PG_mask(1, 7) +#else +#define PGT_pae_xen_l2 0 +#endif /* Has this page been *partially* validated for use as its current type? */ #define _PGT_partial PG_shift(8) #define PGT_partial PG_mask(1, 8)
While in most cases code ahead of the invocation of set_gpfn_from_mfn() deals with shared pages, at least in set_typed_p2m_entry() I can't spot such handling (it's entirely possible there's code missing there). Let's try to play safe and add an extra check. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: Re-base over set_gpfn_from_mfn() conversion to function. --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -1485,12 +1485,19 @@ destroy_frametable: void set_gpfn_from_mfn(unsigned long mfn, unsigned long pfn) { - const struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn))); - unsigned long entry = (d && (d == dom_cow)) ? SHARED_M2P_ENTRY : pfn; + unsigned long entry = pfn; if ( unlikely(!machine_to_phys_mapping_valid) ) return; + if ( entry != INVALID_M2P_ENTRY ) + { + const struct domain *d = page_get_owner(mfn_to_page(_mfn(mfn))); + + if ( d && (d == dom_cow) ) + entry = SHARED_M2P_ENTRY; + } + if ( opt_pv32 && mfn < (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 ) compat_machine_to_phys_mapping[mfn] = entry;
While big endian x86 images are pretty unlikely to appear, merely logging endianness isn't of much use. Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: New. --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -288,7 +288,8 @@ int __init dom0_construct_pv(struct doma module_t *initrd, char *cmdline) { - int i, rc, compatible, order, machine; + int i, rc, order, machine; + bool compatible; struct cpu_user_regs *regs; unsigned long pfn, mfn; unsigned long nr_pages; @@ -358,7 +359,7 @@ int __init dom0_construct_pv(struct doma /* compatibility check */ printk(" Xen kernel: 64-bit, lsb%s\n", IS_ENABLED(CONFIG_PV32) ? ", compat32" : ""); - compatible = 0; + compatible = false; machine = elf_uval(&elf, elf.ehdr, e_machine); #ifdef CONFIG_PV32 @@ -374,13 +375,16 @@ int __init dom0_construct_pv(struct doma return rc; } - compatible = 1; + compatible = true; } } #endif if ( elf_64bit(&elf) && machine == EM_X86_64 ) - compatible = 1; + compatible = true; + + if ( elf_msb(&elf) ) + compatible = false; printk(" Dom0 kernel: %s-bit%s, %s, paddr %#" PRIx64 " -> %#" PRIx64 "\n", elf_64bit(&elf) ? "64" : elf_32bit(&elf) ? "32" : "??",
is_pv_32bit_domain() has become expensive, and its use here is redundant: Only 32-bit guests would ever get PGT_pae_xen_l2 set on their L2 page table pages anyway. Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v2: New. --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -106,8 +106,7 @@ typedef l4_pgentry_t root_pgentry_t; #define l4_linear_offset(_a) (((_a) & VADDR_MASK) >> L4_PAGETABLE_SHIFT) #define is_guest_l2_slot(_d, _t, _s) \ - ( !is_pv_32bit_domain(_d) || \ - !((_t) & PGT_pae_xen_l2) || \ + ( !((_t) & PGT_pae_xen_l2) || \ ((_s) < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_d)) ) #define is_guest_l4_slot(_d, _s) \ ( is_pv_32bit_domain(_d) \
© 2016 - 2024 Red Hat, Inc.