:p
atchew
Login
Hello all, This series enables the Xen boot time allocator on Power by parsing the available memory regions from the firmware-provided device tree. Version 3 of the patch set greatly reduces the amount of code copied from ARM, instead opting to move things into common code where possible. Thanks, Shawn Shawn Anastasio (9): EFI: Introduce inline stub for efi_enabled on !X86 && !ARM xen/asm-generic: Introduce generic acpi.h xen/ppc: Introduce stub asm/static-shmem.h xen/ppc: Update setup.h with required definitions for bootfdt xen/device-tree: Move Arm's setup.c bootinfo functions to common xen/common: Move Arm's bootfdt.c to common xen/ppc: Enable bootfdt and boot allocator xen/ppc: mm-radix: Replace debug printing code with printk xen/ppc: mm-radix: Allocate all paging structures at runtime MAINTAINERS | 2 + xen/arch/arm/Makefile | 1 - xen/arch/arm/setup.c | 419 ---------------- xen/arch/ppc/include/asm/Makefile | 1 + xen/arch/ppc/include/asm/setup.h | 117 +++++ xen/arch/ppc/include/asm/static-shmem.h | 12 + xen/arch/ppc/mm-radix.c | 263 +++++----- xen/arch/ppc/setup.c | 21 +- xen/common/Makefile | 1 + xen/common/device-tree/Makefile | 2 + .../arm => common/device-tree}/bootfdt.c | 11 +- xen/common/device-tree/bootinfo.c | 469 ++++++++++++++++++ xen/include/asm-generic/acpi.h | 20 + xen/include/xen/efi.h | 8 + 14 files changed, 781 insertions(+), 566 deletions(-) create mode 100644 xen/arch/ppc/include/asm/static-shmem.h create mode 100644 xen/common/device-tree/Makefile rename xen/{arch/arm => common/device-tree}/bootfdt.c (97%) create mode 100644 xen/common/device-tree/bootinfo.c create mode 100644 xen/include/asm-generic/acpi.h -- 2.30.2
On architectures with no EFI support, define an inline stub implementation of efi_enabled in efi.h that always returns false. Suggested-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/include/xen/efi.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xen/include/xen/efi.h b/xen/include/xen/efi.h index XXXXXXX..XXXXXXX 100644 --- a/xen/include/xen/efi.h +++ b/xen/include/xen/efi.h @@ -XXX,XX +XXX,XX @@ union compat_pf_efi_info; struct xenpf_efi_runtime_call; struct compat_pf_efi_runtime_call; +#if defined(CONFIG_X86) || defined(CONFIG_ARM) bool efi_enabled(unsigned int feature); +#else +static inline bool efi_enabled(unsigned int feature) +{ + return false; +} +#endif + void efi_init_memory(void); bool efi_boot_mem_unused(unsigned long *start, unsigned long *end); bool efi_rs_using_pgtables(void); -- 2.30.2
Introduce a generic acpi.h header that provides the required definitions to allow files including xen/acpi.h to be compiled. The definitions were largely derived from the !CONFIG_ACPI parts of ARM's acpi.h. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/Makefile | 1 + xen/include/asm-generic/acpi.h | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 xen/include/asm-generic/acpi.h diff --git a/xen/arch/ppc/include/asm/Makefile b/xen/arch/ppc/include/asm/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/include/asm/Makefile +++ b/xen/arch/ppc/include/asm/Makefile @@ -XXX,XX +XXX,XX @@ # SPDX-License-Identifier: GPL-2.0-only +generic-y += acpi.h generic-y += altp2m.h generic-y += device.h generic-y += div64.h diff --git a/xen/include/asm-generic/acpi.h b/xen/include/asm-generic/acpi.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/include/asm-generic/acpi.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GENERIC_ACPI_H +#define __ASM_GENERIC_ACPI_H + +#include <asm/page.h> +#include <xen/types.h> + +#ifdef CONFIG_ACPI +#error "asm-generic acpi.h can't be used with CONFIG_ACPI set" +#endif + +#define COMPILER_DEPENDENT_INT64 int64_t +#define COMPILER_DEPENDENT_UINT64 uint64_t +#define ACPI_MAP_MEM_ATTR PAGE_HYPERVISOR + +#define acpi_disabled (true) +#define disable_acpi() +#define enable_acpi() + +#endif /* __ASM_GENERIC_ACPI_H */ -- 2.30.2
Required for bootfdt.c to build. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/static-shmem.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 xen/arch/ppc/include/asm/static-shmem.h diff --git a/xen/arch/ppc/include/asm/static-shmem.h b/xen/arch/ppc/include/asm/static-shmem.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/arch/ppc/include/asm/static-shmem.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_PPC_STATIC_SHMEM_H__ +#define __ASM_PPC_STATIC_SHMEM_H__ + +static inline int process_shm_node(const void *fdt, int node, + uint32_t address_cells, uint32_t size_cells) +{ + return -EINVAL; +} + +#endif /* __ASM_PPC_STATIC_SHMEM_H__ */ -- 2.30.2
Add the definitions used by ARM's bootfdt.c, which will be moved into xen/common in a later patch, to PPC's setup.h. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/setup.h | 112 +++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/xen/arch/ppc/include/asm/setup.h b/xen/arch/ppc/include/asm/setup.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/include/asm/setup.h +++ b/xen/arch/ppc/include/asm/setup.h @@ -XXX,XX +XXX,XX @@ #define max_init_domid (0) +#include <public/version.h> +#include <asm/p2m.h> +#include <xen/device_tree.h> + +#define MIN_FDT_ALIGN 8 +#define MAX_FDT_SIZE SZ_2M + +#define NR_MEM_BANKS 256 + +#define MAX_MODULES 32 /* Current maximum useful modules */ + +typedef enum { + BOOTMOD_XEN, + BOOTMOD_FDT, + BOOTMOD_KERNEL, + BOOTMOD_RAMDISK, + BOOTMOD_XSM, + BOOTMOD_GUEST_DTB, + BOOTMOD_UNKNOWN +} bootmodule_kind; + +enum membank_type { + /* + * The MEMBANK_DEFAULT type refers to either reserved memory for the + * device/firmware (when the bank is in 'reserved_mem') or any RAM (when + * the bank is in 'mem'). + */ + MEMBANK_DEFAULT, + /* + * The MEMBANK_STATIC_DOMAIN type is used to indicate whether the memory + * bank is bound to a static Xen domain. It is only valid when the bank + * is in reserved_mem. + */ + MEMBANK_STATIC_DOMAIN, + /* + * The MEMBANK_STATIC_HEAP type is used to indicate whether the memory + * bank is reserved as static heap. It is only valid when the bank is + * in reserved_mem. + */ + MEMBANK_STATIC_HEAP, +}; + +/* Indicates the maximum number of characters(\0 included) for shm_id */ +#define MAX_SHM_ID_LENGTH 16 + +struct membank { + paddr_t start; + paddr_t size; + enum membank_type type; +}; + +struct meminfo { + unsigned int nr_banks; + struct membank bank[NR_MEM_BANKS]; +}; + +/* + * The domU flag is set for kernels and ramdisks of "xen,domain" nodes. + * The purpose of the domU flag is to avoid getting confused in + * kernel_probe, where we try to guess which is the dom0 kernel and + * initrd to be compatible with all versions of the multiboot spec. + */ +#define BOOTMOD_MAX_CMDLINE 1024 +struct bootmodule { + bootmodule_kind kind; + bool domU; + paddr_t start; + paddr_t size; +}; + +/* DT_MAX_NAME is the node name max length according the DT spec */ +#define DT_MAX_NAME 41 +struct bootcmdline { + bootmodule_kind kind; + bool domU; + paddr_t start; + char dt_name[DT_MAX_NAME]; + char cmdline[BOOTMOD_MAX_CMDLINE]; +}; + +struct bootmodules { + int nr_mods; + struct bootmodule module[MAX_MODULES]; +}; + +struct bootcmdlines { + unsigned int nr_mods; + struct bootcmdline cmdline[MAX_MODULES]; +}; + +struct bootinfo { + struct meminfo mem; + struct meminfo reserved_mem; + struct bootmodules modules; + struct bootcmdlines cmdlines; + bool static_heap; +}; + +extern struct bootinfo bootinfo; + +/* + * bootinfo.c + */ +bool check_reserved_regions_overlap(paddr_t region_start, paddr_t region_size); +struct bootmodule *add_boot_module(bootmodule_kind kind, + paddr_t start, paddr_t size, bool domU); +void add_boot_cmdline(const char *name, const char *cmdline, + bootmodule_kind kind, paddr_t start, bool domU); +const char *boot_module_kind_as_string(bootmodule_kind kind); +struct bootcmdline *boot_cmdline_find_by_kind(bootmodule_kind kind); +void populate_boot_allocator(void); + #endif /* __ASM_PPC_SETUP_H__ */ -- 2.30.2
Arm's setup.c contains a collection of functions for parsing memory map and other boot information from a device tree. Since these routines are generally useful on any architecture that supports device tree booting, move them into xen/common/device-tree. Suggested-by: Julien Grall <julien@xen.org> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- MAINTAINERS | 1 + xen/arch/arm/setup.c | 419 -------------------------- xen/common/Makefile | 1 + xen/common/device-tree/Makefile | 1 + xen/common/device-tree/bootinfo.c | 469 ++++++++++++++++++++++++++++++ 5 files changed, 472 insertions(+), 419 deletions(-) create mode 100644 xen/common/device-tree/Makefile create mode 100644 xen/common/device-tree/bootinfo.c diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ M: Stefano Stabellini <sstabellini@kernel.org> M: Julien Grall <julien@xen.org> S: Supported F: xen/common/libfdt/ +F: xen/common/device-tree/setup.c F: xen/common/device_tree.c F: xen/common/dt-overlay.c F: xen/include/xen/libfdt/ diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/arm/setup.c +++ b/xen/arch/arm/setup.c @@ -XXX,XX +XXX,XX @@ #include <xsm/xsm.h> #include <asm/acpi.h> -struct bootinfo __initdata bootinfo; - /* * Sanitized version of cpuinfo containing only features available on all * cores (only on arm64 as there is no sanitization support on arm32). @@ -XXX,XX +XXX,XX @@ static void __init processor_id(void) processor_setup(); } -static void __init dt_unreserved_regions(paddr_t s, paddr_t e, - void (*cb)(paddr_t ps, paddr_t pe), - unsigned int first) -{ - unsigned int i, nr; - int rc; - - rc = fdt_num_mem_rsv(device_tree_flattened); - if ( rc < 0 ) - panic("Unable to retrieve the number of reserved regions (rc=%d)\n", - rc); - - nr = rc; - - for ( i = first; i < nr ; i++ ) - { - paddr_t r_s, r_e; - - if ( fdt_get_mem_rsv_paddr(device_tree_flattened, i, &r_s, &r_e ) < 0 ) - /* If we can't read it, pretend it doesn't exist... */ - continue; - - r_e += r_s; /* fdt_get_mem_rsv_paddr returns length */ - - if ( s < r_e && r_s < e ) - { - dt_unreserved_regions(r_e, e, cb, i+1); - dt_unreserved_regions(s, r_s, cb, i+1); - return; - } - } - - /* - * i is the current bootmodule we are evaluating across all possible - * kinds. - * - * When retrieving the corresponding reserved-memory addresses - * below, we need to index the bootinfo.reserved_mem bank starting - * from 0, and only counting the reserved-memory modules. Hence, - * we need to use i - nr. - */ - for ( ; i - nr < bootinfo.reserved_mem.nr_banks; i++ ) - { - paddr_t r_s = bootinfo.reserved_mem.bank[i - nr].start; - paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i - nr].size; - - if ( s < r_e && r_s < e ) - { - dt_unreserved_regions(r_e, e, cb, i + 1); - dt_unreserved_regions(s, r_s, cb, i + 1); - return; - } - } - - cb(s, e); -} - -/* - * TODO: '*_end' could be 0 if the bank/region is at the end of the physical - * address space. This is for now not handled as it requires more rework. - */ -static bool __init meminfo_overlap_check(struct meminfo *meminfo, - paddr_t region_start, - paddr_t region_size) -{ - paddr_t bank_start = INVALID_PADDR, bank_end = 0; - paddr_t region_end = region_start + region_size; - unsigned int i, bank_num = meminfo->nr_banks; - - for ( i = 0; i < bank_num; i++ ) - { - bank_start = meminfo->bank[i].start; - bank_end = bank_start + meminfo->bank[i].size; - - if ( region_end <= bank_start || region_start >= bank_end ) - continue; - else - { - printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", - region_start, region_end, i, bank_start, bank_end); - return true; - } - } - - return false; -} - -/* - * TODO: '*_end' could be 0 if the module/region is at the end of the physical - * address space. This is for now not handled as it requires more rework. - */ -static bool __init bootmodules_overlap_check(struct bootmodules *bootmodules, - paddr_t region_start, - paddr_t region_size) -{ - paddr_t mod_start = INVALID_PADDR, mod_end = 0; - paddr_t region_end = region_start + region_size; - unsigned int i, mod_num = bootmodules->nr_mods; - - for ( i = 0; i < mod_num; i++ ) - { - mod_start = bootmodules->module[i].start; - mod_end = mod_start + bootmodules->module[i].size; - - if ( region_end <= mod_start || region_start >= mod_end ) - continue; - else - { - printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", - region_start, region_end, i, mod_start, mod_end); - return true; - } - } - - return false; -} - -void __init fw_unreserved_regions(paddr_t s, paddr_t e, - void (*cb)(paddr_t ps, paddr_t pe), - unsigned int first) -{ - if ( acpi_disabled ) - dt_unreserved_regions(s, e, cb, first); - else - cb(s, e); -} - -/* - * Given an input physical address range, check if this range is overlapping - * with the existing reserved memory regions defined in bootinfo. - * Return true if the input physical address range is overlapping with any - * existing reserved memory regions, otherwise false. - */ -bool __init check_reserved_regions_overlap(paddr_t region_start, - paddr_t region_size) -{ - /* Check if input region is overlapping with bootinfo.reserved_mem banks */ - if ( meminfo_overlap_check(&bootinfo.reserved_mem, - region_start, region_size) ) - return true; - - /* Check if input region is overlapping with bootmodules */ - if ( bootmodules_overlap_check(&bootinfo.modules, - region_start, region_size) ) - return true; - -#ifdef CONFIG_ACPI - /* Check if input region is overlapping with ACPI EfiACPIReclaimMemory */ - if ( meminfo_overlap_check(&bootinfo.acpi, region_start, region_size) ) - return true; -#endif - - return false; -} - -struct bootmodule __init *add_boot_module(bootmodule_kind kind, - paddr_t start, paddr_t size, - bool domU) -{ - struct bootmodules *mods = &bootinfo.modules; - struct bootmodule *mod; - unsigned int i; - - if ( mods->nr_mods == MAX_MODULES ) - { - printk("Ignoring %s boot module at %"PRIpaddr"-%"PRIpaddr" (too many)\n", - boot_module_kind_as_string(kind), start, start + size); - return NULL; - } - - if ( check_reserved_regions_overlap(start, size) ) - return NULL; - - for ( i = 0 ; i < mods->nr_mods ; i++ ) - { - mod = &mods->module[i]; - if ( mod->kind == kind && mod->start == start ) - { - if ( !domU ) - mod->domU = false; - return mod; - } - } - - mod = &mods->module[mods->nr_mods++]; - mod->kind = kind; - mod->start = start; - mod->size = size; - mod->domU = domU; - - return mod; -} - -/* - * boot_module_find_by_kind can only be used to return Xen modules (e.g - * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest - * modules. - */ -struct bootmodule * __init boot_module_find_by_kind(bootmodule_kind kind) -{ - struct bootmodules *mods = &bootinfo.modules; - struct bootmodule *mod; - int i; - for (i = 0 ; i < mods->nr_mods ; i++ ) - { - mod = &mods->module[i]; - if ( mod->kind == kind && !mod->domU ) - return mod; - } - return NULL; -} - -void __init add_boot_cmdline(const char *name, const char *cmdline, - bootmodule_kind kind, paddr_t start, bool domU) -{ - struct bootcmdlines *cmds = &bootinfo.cmdlines; - struct bootcmdline *cmd; - - if ( cmds->nr_mods == MAX_MODULES ) - { - printk("Ignoring %s cmdline (too many)\n", name); - return; - } - - cmd = &cmds->cmdline[cmds->nr_mods++]; - cmd->kind = kind; - cmd->domU = domU; - cmd->start = start; - - ASSERT(strlen(name) <= DT_MAX_NAME); - safe_strcpy(cmd->dt_name, name); - - if ( strlen(cmdline) > BOOTMOD_MAX_CMDLINE ) - panic("module %s command line too long\n", name); - safe_strcpy(cmd->cmdline, cmdline); -} - -/* - * boot_cmdline_find_by_kind can only be used to return Xen modules (e.g - * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest - * modules. - */ -struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind) -{ - struct bootcmdlines *cmds = &bootinfo.cmdlines; - struct bootcmdline *cmd; - int i; - - for ( i = 0 ; i < cmds->nr_mods ; i++ ) - { - cmd = &cmds->cmdline[i]; - if ( cmd->kind == kind && !cmd->domU ) - return cmd; - } - return NULL; -} - -struct bootcmdline * __init boot_cmdline_find_by_name(const char *name) -{ - struct bootcmdlines *mods = &bootinfo.cmdlines; - struct bootcmdline *mod; - unsigned int i; - - for (i = 0 ; i < mods->nr_mods ; i++ ) - { - mod = &mods->cmdline[i]; - if ( strcmp(mod->dt_name, name) == 0 ) - return mod; - } - return NULL; -} - -struct bootmodule * __init boot_module_find_by_addr_and_kind(bootmodule_kind kind, - paddr_t start) -{ - struct bootmodules *mods = &bootinfo.modules; - struct bootmodule *mod; - unsigned int i; - - for (i = 0 ; i < mods->nr_mods ; i++ ) - { - mod = &mods->module[i]; - if ( mod->kind == kind && mod->start == start ) - return mod; - } - return NULL; -} - -const char * __init boot_module_kind_as_string(bootmodule_kind kind) -{ - switch ( kind ) - { - case BOOTMOD_XEN: return "Xen"; - case BOOTMOD_FDT: return "Device Tree"; - case BOOTMOD_KERNEL: return "Kernel"; - case BOOTMOD_RAMDISK: return "Ramdisk"; - case BOOTMOD_XSM: return "XSM"; - case BOOTMOD_GUEST_DTB: return "DTB"; - case BOOTMOD_UNKNOWN: return "Unknown"; - default: BUG(); - } -} - void __init discard_initial_modules(void) { struct bootmodules *mi = &bootinfo.modules; @@ -XXX,XX +XXX,XX @@ static void * __init relocate_fdt(paddr_t dtb_paddr, size_t dtb_size) return fdt; } -/* - * Return the end of the non-module region starting at s. In other - * words return s the start of the next modules after s. - * - * On input *end is the end of the region which should be considered - * and it is updated to reflect the end of the module, clipped to the - * end of the region if it would run over. - */ -static paddr_t __init next_module(paddr_t s, paddr_t *end) -{ - struct bootmodules *mi = &bootinfo.modules; - paddr_t lowest = ~(paddr_t)0; - int i; - - for ( i = 0; i < mi->nr_mods; i++ ) - { - paddr_t mod_s = mi->module[i].start; - paddr_t mod_e = mod_s + mi->module[i].size; - - if ( !mi->module[i].size ) - continue; - - if ( mod_s < s ) - continue; - if ( mod_s > lowest ) - continue; - if ( mod_s > *end ) - continue; - lowest = mod_s; - *end = min(*end, mod_e); - } - return lowest; -} - void __init init_pdx(void) { paddr_t bank_start, bank_size, bank_end; @@ -XXX,XX +XXX,XX @@ void __init init_pdx(void) } } -/* - * Populate the boot allocator. - * If a static heap was not provided by the admin, all the RAM but the - * following regions will be added: - * - Modules (e.g., Xen, Kernel) - * - Reserved regions - * - Xenheap (arm32 only) - * If a static heap was provided by the admin, populate the boot - * allocator with the corresponding regions only, but with Xenheap excluded - * on arm32. - */ -void __init populate_boot_allocator(void) -{ - unsigned int i; - const struct meminfo *banks = &bootinfo.mem; - paddr_t s, e; - - if ( bootinfo.static_heap ) - { - for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ ) - { - if ( bootinfo.reserved_mem.bank[i].type != MEMBANK_STATIC_HEAP ) - continue; - - s = bootinfo.reserved_mem.bank[i].start; - e = s + bootinfo.reserved_mem.bank[i].size; -#ifdef CONFIG_ARM_32 - /* Avoid the xenheap, note that the xenheap cannot across a bank */ - if ( s <= mfn_to_maddr(directmap_mfn_start) && - e >= mfn_to_maddr(directmap_mfn_end) ) - { - init_boot_pages(s, mfn_to_maddr(directmap_mfn_start)); - init_boot_pages(mfn_to_maddr(directmap_mfn_end), e); - } - else -#endif - init_boot_pages(s, e); - } - - return; - } - - for ( i = 0; i < banks->nr_banks; i++ ) - { - const struct membank *bank = &banks->bank[i]; - paddr_t bank_end = bank->start + bank->size; - - s = bank->start; - while ( s < bank_end ) - { - paddr_t n = bank_end; - - e = next_module(s, &n); - - if ( e == ~(paddr_t)0 ) - e = n = bank_end; - - /* - * Module in a RAM bank other than the one which we are - * not dealing with here. - */ - if ( e > bank_end ) - e = bank_end; - -#ifdef CONFIG_ARM_32 - /* Avoid the xenheap */ - if ( s < mfn_to_maddr(directmap_mfn_end) && - mfn_to_maddr(directmap_mfn_start) < e ) - { - e = mfn_to_maddr(directmap_mfn_start); - n = mfn_to_maddr(directmap_mfn_end); - } -#endif - - fw_unreserved_regions(s, e, init_boot_pages, 0); - s = n; - } - } -} - size_t __read_mostly dcache_line_bytes; /* C entry point for boot CPU */ diff --git a/xen/common/Makefile b/xen/common/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_UBSAN) += ubsan/ obj-$(CONFIG_NEEDS_LIBELF) += libelf/ obj-$(CONFIG_HAS_DEVICE_TREE) += libfdt/ +obj-$(CONFIG_HAS_DEVICE_TREE) += device-tree/ CONF_FILE := $(if $(patsubst /%,,$(KCONFIG_CONFIG)),$(objtree)/)$(KCONFIG_CONFIG) $(obj)/config.gz: $(CONF_FILE) diff --git a/xen/common/device-tree/Makefile b/xen/common/device-tree/Makefile new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/common/device-tree/Makefile @@ -0,0 +1 @@ +obj-y += bootinfo.o diff --git a/xen/common/device-tree/bootinfo.c b/xen/common/device-tree/bootinfo.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/common/device-tree/bootinfo.c @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Derived from $xen/arch/arm/setup.c. + * + * Early device tree parsing and bookkeeping routines. + * + * Tim Deegan <tim@xen.org> + * Copyright (c) 2011 Citrix Systems. + * Copyright (c) 2024 Raptor Engineering LLC + */ + +#include <xen/compile.h> +#include <xen/errno.h> +#include <xen/device_tree.h> +#include <xen/domain_page.h> +#include <xen/grant_table.h> +#include <xen/types.h> +#include <xen/string.h> +#include <xen/serial.h> +#include <xen/sched.h> +#include <xen/console.h> +#include <xen/err.h> +#include <xen/init.h> +#include <xen/irq.h> +#include <xen/mm.h> +#include <xen/param.h> +#include <xen/softirq.h> +#include <xen/keyhandler.h> +#include <xen/cpu.h> +#include <xen/pfn.h> +#include <xen/virtual_region.h> +#include <xen/vmap.h> +#include <xen/trace.h> +#include <xen/libfdt/libfdt-xen.h> +#include <xen/acpi.h> +#include <xen/warning.h> +#include <xen/hypercall.h> +#include <asm/page.h> +#include <asm/current.h> +#include <asm/setup.h> +#include <asm/setup.h> + +struct bootinfo __initdata bootinfo; + +const char * __init boot_module_kind_as_string(bootmodule_kind kind) +{ + switch ( kind ) + { + case BOOTMOD_XEN: return "Xen"; + case BOOTMOD_FDT: return "Device Tree"; + case BOOTMOD_KERNEL: return "Kernel"; + case BOOTMOD_RAMDISK: return "Ramdisk"; + case BOOTMOD_XSM: return "XSM"; + case BOOTMOD_GUEST_DTB: return "DTB"; + case BOOTMOD_UNKNOWN: return "Unknown"; + default: BUG(); + } +} + +static void __init dt_unreserved_regions(paddr_t s, paddr_t e, + void (*cb)(paddr_t ps, paddr_t pe), + unsigned int first) +{ + unsigned int i, nr; + int rc; + + rc = fdt_num_mem_rsv(device_tree_flattened); + if ( rc < 0 ) + panic("Unable to retrieve the number of reserved regions (rc=%d)\n", + rc); + + nr = rc; + + for ( i = first; i < nr ; i++ ) + { + paddr_t r_s, r_e; + + if ( fdt_get_mem_rsv_paddr(device_tree_flattened, i, &r_s, &r_e ) < 0 ) + /* If we can't read it, pretend it doesn't exist... */ + continue; + + r_e += r_s; /* fdt_get_mem_rsv_paddr returns length */ + + if ( s < r_e && r_s < e ) + { + dt_unreserved_regions(r_e, e, cb, i+1); + dt_unreserved_regions(s, r_s, cb, i+1); + return; + } + } + + /* + * i is the current bootmodule we are evaluating across all possible + * kinds. + * + * When retrieving the corresponding reserved-memory addresses + * below, we need to index the bootinfo.reserved_mem bank starting + * from 0, and only counting the reserved-memory modules. Hence, + * we need to use i - nr. + */ + for ( ; i - nr < bootinfo.reserved_mem.nr_banks; i++ ) + { + paddr_t r_s = bootinfo.reserved_mem.bank[i - nr].start; + paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i - nr].size; + + if ( s < r_e && r_s < e ) + { + dt_unreserved_regions(r_e, e, cb, i + 1); + dt_unreserved_regions(s, r_s, cb, i + 1); + return; + } + } + + cb(s, e); +} + +/* + * TODO: '*_end' could be 0 if the bank/region is at the end of the physical + * address space. This is for now not handled as it requires more rework. + */ +static bool __init meminfo_overlap_check(struct meminfo *meminfo, + paddr_t region_start, + paddr_t region_size) +{ + paddr_t bank_start = INVALID_PADDR, bank_end = 0; + paddr_t region_end = region_start + region_size; + unsigned int i, bank_num = meminfo->nr_banks; + + for ( i = 0; i < bank_num; i++ ) + { + bank_start = meminfo->bank[i].start; + bank_end = bank_start + meminfo->bank[i].size; + + if ( region_end <= bank_start || region_start >= bank_end ) + continue; + else + { + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", + region_start, region_end, i, bank_start, bank_end); + return true; + } + } + + return false; +} + +/* + * TODO: '*_end' could be 0 if the module/region is at the end of the physical + * address space. This is for now not handled as it requires more rework. + */ +static bool __init bootmodules_overlap_check(struct bootmodules *bootmodules, + paddr_t region_start, + paddr_t region_size) +{ + paddr_t mod_start = INVALID_PADDR, mod_end = 0; + paddr_t region_end = region_start + region_size; + unsigned int i, mod_num = bootmodules->nr_mods; + + for ( i = 0; i < mod_num; i++ ) + { + mod_start = bootmodules->module[i].start; + mod_end = mod_start + bootmodules->module[i].size; + + if ( region_end <= mod_start || region_start >= mod_end ) + continue; + else + { + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", + region_start, region_end, i, mod_start, mod_end); + return true; + } + } + + return false; +} + +void __init fw_unreserved_regions(paddr_t s, paddr_t e, + void (*cb)(paddr_t ps, paddr_t pe), + unsigned int first) +{ + if ( acpi_disabled ) + dt_unreserved_regions(s, e, cb, first); + else + cb(s, e); +} + +/* + * Given an input physical address range, check if this range is overlapping + * with the existing reserved memory regions defined in bootinfo. + * Return true if the input physical address range is overlapping with any + * existing reserved memory regions, otherwise false. + */ +bool __init check_reserved_regions_overlap(paddr_t region_start, + paddr_t region_size) +{ + /* Check if input region is overlapping with bootinfo.reserved_mem banks */ + if ( meminfo_overlap_check(&bootinfo.reserved_mem, + region_start, region_size) ) + return true; + + /* Check if input region is overlapping with bootmodules */ + if ( bootmodules_overlap_check(&bootinfo.modules, + region_start, region_size) ) + return true; + +#ifdef CONFIG_ACPI + /* Check if input region is overlapping with ACPI EfiACPIReclaimMemory */ + if ( meminfo_overlap_check(&bootinfo.acpi, region_start, region_size) ) + return true; +#endif + + return false; +} + +struct bootmodule __init *add_boot_module(bootmodule_kind kind, + paddr_t start, paddr_t size, + bool domU) +{ + struct bootmodules *mods = &bootinfo.modules; + struct bootmodule *mod; + unsigned int i; + + if ( mods->nr_mods == MAX_MODULES ) + { + printk("Ignoring %s boot module at %"PRIpaddr"-%"PRIpaddr" (too many)\n", + boot_module_kind_as_string(kind), start, start + size); + return NULL; + } + + if ( check_reserved_regions_overlap(start, size) ) + return NULL; + + for ( i = 0 ; i < mods->nr_mods ; i++ ) + { + mod = &mods->module[i]; + if ( mod->kind == kind && mod->start == start ) + { + if ( !domU ) + mod->domU = false; + return mod; + } + } + + mod = &mods->module[mods->nr_mods++]; + mod->kind = kind; + mod->start = start; + mod->size = size; + mod->domU = domU; + + return mod; +} + +/* + * boot_module_find_by_kind can only be used to return Xen modules (e.g + * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest + * modules. + */ +struct bootmodule * __init boot_module_find_by_kind(bootmodule_kind kind) +{ + struct bootmodules *mods = &bootinfo.modules; + struct bootmodule *mod; + int i; + for (i = 0 ; i < mods->nr_mods ; i++ ) + { + mod = &mods->module[i]; + if ( mod->kind == kind && !mod->domU ) + return mod; + } + return NULL; +} + +void __init add_boot_cmdline(const char *name, const char *cmdline, + bootmodule_kind kind, paddr_t start, bool domU) +{ + struct bootcmdlines *cmds = &bootinfo.cmdlines; + struct bootcmdline *cmd; + + if ( cmds->nr_mods == MAX_MODULES ) + { + printk("Ignoring %s cmdline (too many)\n", name); + return; + } + + cmd = &cmds->cmdline[cmds->nr_mods++]; + cmd->kind = kind; + cmd->domU = domU; + cmd->start = start; + + ASSERT(strlen(name) <= DT_MAX_NAME); + safe_strcpy(cmd->dt_name, name); + + if ( strlen(cmdline) > BOOTMOD_MAX_CMDLINE ) + panic("module %s command line too long\n", name); + safe_strcpy(cmd->cmdline, cmdline); +} + +/* + * boot_cmdline_find_by_kind can only be used to return Xen modules (e.g + * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest + * modules. + */ +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind) +{ + struct bootcmdlines *cmds = &bootinfo.cmdlines; + struct bootcmdline *cmd; + int i; + + for ( i = 0 ; i < cmds->nr_mods ; i++ ) + { + cmd = &cmds->cmdline[i]; + if ( cmd->kind == kind && !cmd->domU ) + return cmd; + } + return NULL; +} + +struct bootcmdline * __init boot_cmdline_find_by_name(const char *name) +{ + struct bootcmdlines *mods = &bootinfo.cmdlines; + struct bootcmdline *mod; + unsigned int i; + + for (i = 0 ; i < mods->nr_mods ; i++ ) + { + mod = &mods->cmdline[i]; + if ( strcmp(mod->dt_name, name) == 0 ) + return mod; + } + return NULL; +} + +struct bootmodule * __init boot_module_find_by_addr_and_kind(bootmodule_kind kind, + paddr_t start) +{ + struct bootmodules *mods = &bootinfo.modules; + struct bootmodule *mod; + unsigned int i; + + for (i = 0 ; i < mods->nr_mods ; i++ ) + { + mod = &mods->module[i]; + if ( mod->kind == kind && mod->start == start ) + return mod; + } + return NULL; +} + +/* + * Return the end of the non-module region starting at s. In other + * words return s the start of the next modules after s. + * + * On input *end is the end of the region which should be considered + * and it is updated to reflect the end of the module, clipped to the + * end of the region if it would run over. + */ +static paddr_t __init next_module(paddr_t s, paddr_t *end) +{ + struct bootmodules *mi = &bootinfo.modules; + paddr_t lowest = ~(paddr_t)0; + int i; + + for ( i = 0; i < mi->nr_mods; i++ ) + { + paddr_t mod_s = mi->module[i].start; + paddr_t mod_e = mod_s + mi->module[i].size; + + if ( !mi->module[i].size ) + continue; + + if ( mod_s < s ) + continue; + if ( mod_s > lowest ) + continue; + if ( mod_s > *end ) + continue; + lowest = mod_s; + *end = min(*end, mod_e); + } + return lowest; +} + +/* + * Populate the boot allocator. + * If a static heap was not provided by the admin, all the RAM but the + * following regions will be added: + * - Modules (e.g., Xen, Kernel) + * - Reserved regions + * - Xenheap (arm32 only) + * If a static heap was provided by the admin, populate the boot + * allocator with the corresponding regions only, but with Xenheap excluded + * on arm32. + */ +void __init populate_boot_allocator(void) +{ + unsigned int i; + const struct meminfo *banks = &bootinfo.mem; + paddr_t s, e; + + if ( bootinfo.static_heap ) + { + for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ ) + { + if ( bootinfo.reserved_mem.bank[i].type != MEMBANK_STATIC_HEAP ) + continue; + + s = bootinfo.reserved_mem.bank[i].start; + e = s + bootinfo.reserved_mem.bank[i].size; +#ifdef CONFIG_ARM_32 + /* Avoid the xenheap, note that the xenheap cannot across a bank */ + if ( s <= mfn_to_maddr(directmap_mfn_start) && + e >= mfn_to_maddr(directmap_mfn_end) ) + { + init_boot_pages(s, mfn_to_maddr(directmap_mfn_start)); + init_boot_pages(mfn_to_maddr(directmap_mfn_end), e); + } + else +#endif + init_boot_pages(s, e); + } + + return; + } + + for ( i = 0; i < banks->nr_banks; i++ ) + { + const struct membank *bank = &banks->bank[i]; + paddr_t bank_end = bank->start + bank->size; + + s = bank->start; + while ( s < bank_end ) + { + paddr_t n = bank_end; + + e = next_module(s, &n); + + if ( e == ~(paddr_t)0 ) + e = n = bank_end; + + /* + * Module in a RAM bank other than the one which we are + * not dealing with here. + */ + if ( e > bank_end ) + e = bank_end; + +#ifdef CONFIG_ARM_32 + /* Avoid the xenheap */ + if ( s < mfn_to_maddr(directmap_mfn_end) && + mfn_to_maddr(directmap_mfn_start) < e ) + { + e = mfn_to_maddr(directmap_mfn_start); + n = mfn_to_maddr(directmap_mfn_end); + } +#endif + + fw_unreserved_regions(s, e, init_boot_pages, 0); + s = n; + } + } +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ -- 2.30.2
Move Arm's bootfdt.c to xen/common so that it can be used by other device tree architectures like PPC and RISCV. Suggested-by: Julien Grall <julien@xen.org> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Acked-by: Julien Grall <julien@xen.org> --- Changes in v2: - Drop #if defined(CONFIG_ARM_EFI) now that efi_enabled is stubbed MAINTAINERS | 1 + xen/arch/arm/Makefile | 1 - xen/common/device-tree/Makefile | 1 + xen/{arch/arm => common/device-tree}/bootfdt.c | 0 4 files changed, 2 insertions(+), 1 deletion(-) rename xen/{arch/arm => common/device-tree}/bootfdt.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ S: Supported L: xen-devel@lists.xenproject.org F: docs/misc/arm/ F: xen/arch/arm/ +F: xen/common/device-tree/bootfdt.c F: xen/drivers/char/arm-uart.c F: xen/drivers/char/cadence-uart.c F: xen/drivers/char/exynos4210-uart.c diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/arm/Makefile +++ b/xen/arch/arm/Makefile @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_TEE) += tee/ obj-$(CONFIG_HAS_VPCI) += vpci.o obj-$(CONFIG_HAS_ALTERNATIVE) += alternative.o -obj-y += bootfdt.init.o obj-y += cpuerrata.o obj-y += cpufeature.o obj-y += decode.o diff --git a/xen/common/device-tree/Makefile b/xen/common/device-tree/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/Makefile +++ b/xen/common/device-tree/Makefile @@ -1 +1,2 @@ +obj-y += bootfdt.init.o obj-y += bootinfo.o diff --git a/xen/arch/arm/bootfdt.c b/xen/common/device-tree/bootfdt.c similarity index 100% rename from xen/arch/arm/bootfdt.c rename to xen/common/device-tree/bootfdt.c -- 2.30.2
Enable usage of bootfdt for populating the boot info struct from the firmware-provided device tree. Also enable the Xen boot page allocator. Includes minor changes to bootfdt.c's boot_fdt_info() to tolerate the scenario in which the FDT overlaps a reserved memory region, as is the case on PPC when booted directly from skiboot. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/setup.h | 5 +++++ xen/arch/ppc/setup.c | 21 ++++++++++++++++++++- xen/common/device-tree/bootfdt.c | 11 +++++++++-- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/xen/arch/ppc/include/asm/setup.h b/xen/arch/ppc/include/asm/setup.h index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/include/asm/setup.h +++ b/xen/arch/ppc/include/asm/setup.h @@ -XXX,XX +XXX,XX @@ const char *boot_module_kind_as_string(bootmodule_kind kind); struct bootcmdline *boot_cmdline_find_by_kind(bootmodule_kind kind); void populate_boot_allocator(void); +/* + * bootfdt.c + */ +size_t boot_fdt_info(const void *fdt, paddr_t paddr); + #endif /* __ASM_PPC_SETUP_H__ */ diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/setup.c +++ b/xen/arch/ppc/setup.c @@ -XXX,XX +XXX,XX @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include <xen/init.h> #include <xen/lib.h> +#include <xen/libfdt/libfdt.h> #include <xen/mm.h> #include <public/version.h> #include <asm/boot.h> #include <asm/early_printk.h> #include <asm/mm.h> #include <asm/processor.h> +#include <asm/setup.h> /* Xen stack for bringing up the first CPU. */ unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE); @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { + void *boot_fdt; + struct bootmodule *xen_bootmodule; + if ( r5 ) { /* Unsupported OpenFirmware boot protocol */ @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, else { /* kexec boot protocol */ - boot_opal_init((void *)r3); + boot_fdt = (void *)r3; + boot_opal_init(boot_fdt); } setup_exceptions(); + device_tree_flattened = boot_fdt; + boot_fdt_info(boot_fdt, r3); + + /* + * Xen relocates itself at the ppc64 entrypoint, so we need to manually mark + * the kernel module. + */ + xen_bootmodule = add_boot_module(BOOTMOD_XEN, __pa(_start), + PAGE_ALIGN(__pa(_end)), false); + BUG_ON(!xen_bootmodule); + + populate_boot_allocator(); + setup_initial_pagetables(); early_printk("Hello, ppc64le!\n"); diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) if ( ret < 0 ) panic("No valid device tree\n"); - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); - ret = device_tree_for_each_node(fdt, 0, early_scan_node, NULL); if ( ret ) panic("Early FDT parsing failed (%d)\n", ret); + /* + * Add module for the FDT itself after the device tree has been parsed. This + * is required on ppc64le where the device tree passed to Xen may have been + * allocated by skiboot, in which case it will exist within a reserved + * region and this call will fail. This is fine, however, since either way + * the allocator will know not to step on the device tree. + */ + add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); + /* * On Arm64 setup_directmap_mappings() expects to be called with the lowest * bank in memory first. There is no requirement that the DT will provide -- 2.30.2
Now that we have common code building, there's no need to keep the old itoa64+debug print function in mm-radix.c Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- Changes in v2: - Use CONFIG_DEBUG instead of NDEBUG xen/arch/ppc/mm-radix.c | 58 +++++++++-------------------------------- 1 file changed, 12 insertions(+), 46 deletions(-) diff --git a/xen/arch/ppc/mm-radix.c b/xen/arch/ppc/mm-radix.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/mm-radix.c +++ b/xen/arch/ppc/mm-radix.c @@ -XXX,XX +XXX,XX @@ void enable_mmu(void); +#ifdef CONFIG_DEBUG +#define radix_dprintk(msg, ...) printk(XENLOG_DEBUG msg, ## __VA_ARGS__) +#else +#define radix_dprintk(...) +#endif + #define INITIAL_LVL1_PD_COUNT 1 #define INITIAL_LVL2_LVL3_PD_COUNT 2 #define INITIAL_LVL4_PT_COUNT 256 @@ -XXX,XX +XXX,XX @@ static __init struct lvl4_pt *lvl4_pt_pool_alloc(void) return &initial_lvl4_pt_pool[initial_lvl4_pt_pool_used++]; } -#ifndef NDEBUG -/* TODO: Remove once we get common/ building */ -static char *__init itoa64_hex(uint64_t val, char *out_buf, size_t buf_len) -{ - uint64_t cur; - size_t i = buf_len - 1; - - /* Null terminate buffer */ - out_buf[i] = '\0'; - - /* Add digits in reverse */ - cur = val; - while ( cur && i > 0 ) - { - out_buf[--i] = "0123456789ABCDEF"[cur % 16]; - cur /= 16; - } - - /* Pad to 16 digits */ - while ( i > 0 ) - out_buf[--i] = '0'; - - return out_buf + i; -} -#endif - -static void __init radix_dprint(uint64_t addr, const char *msg) -{ -#ifndef NDEBUG - char buf[sizeof("DEADBEEFCAFEBABA")]; - char *addr_s = itoa64_hex(addr, buf, sizeof(buf)); - - early_printk("(0x"); - early_printk(addr_s); - early_printk(") "); - early_printk(msg); -#endif -} - static void __init setup_initial_mapping(struct lvl1_pd *lvl1, vaddr_t map_start, vaddr_t map_end, @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, unsigned long paddr = (page_addr - map_start) + phys_base; unsigned long flags; - radix_dprint(paddr, "being mapped to "); - radix_dprint(page_addr, "!\n"); + radix_dprintk("%016lx being mapped to %016lx\n", paddr, page_addr); if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) { - radix_dprint(page_addr, "being marked as TEXT (RX)\n"); + radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); flags = PTE_XEN_RX; } else if ( is_kernel_rodata(page_addr) ) { - radix_dprint(page_addr, "being marked as RODATA (RO)\n"); + radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); flags = PTE_XEN_RO; } else { - radix_dprint(page_addr, "being marked as DEFAULT (RW)\n"); + radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); flags = PTE_XEN_RW; } *pte = paddr_to_pte(paddr, flags); - radix_dprint(paddr_to_pte(paddr, flags).pte, - "is result of PTE map!\n"); + radix_dprintk("%016lx is the result of PTE map\n", + paddr_to_pte(paddr, flags).pte); } else { -- 2.30.2
In the initial mm-radix implementation, the in-memory partition and process tables required to configure the MMU, as well as the page tables themselves were all allocated statically since the boot allocator was not yet available. Now that it is, allocate these structures at runtime and bump the size of the Process Table to its maximum supported value (on POWER9). Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- Changes in v2: - Drop all static allocation and use boot allocator for page tables too xen/arch/ppc/mm-radix.c | 227 +++++++++++++++++++++------------------- 1 file changed, 119 insertions(+), 108 deletions(-) diff --git a/xen/arch/ppc/mm-radix.c b/xen/arch/ppc/mm-radix.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/mm-radix.c +++ b/xen/arch/ppc/mm-radix.c @@ -XXX,XX +XXX,XX @@ void enable_mmu(void); #define radix_dprintk(...) #endif -#define INITIAL_LVL1_PD_COUNT 1 -#define INITIAL_LVL2_LVL3_PD_COUNT 2 -#define INITIAL_LVL4_PT_COUNT 256 - -static size_t __initdata initial_lvl1_pd_pool_used; -static struct lvl1_pd initial_lvl1_pd_pool[INITIAL_LVL1_PD_COUNT]; - -static size_t __initdata initial_lvl2_lvl3_pd_pool_used; -static struct lvl2_pd initial_lvl2_lvl3_pd_pool[INITIAL_LVL2_LVL3_PD_COUNT]; - -static size_t __initdata initial_lvl4_pt_pool_used; -static struct lvl4_pt initial_lvl4_pt_pool[INITIAL_LVL4_PT_COUNT]; - -/* Only reserve minimum Partition and Process tables */ #define PATB_SIZE_LOG2 16 /* Only supported partition table size on POWER9 */ #define PATB_SIZE (1UL << PATB_SIZE_LOG2) -#define PRTB_SIZE_LOG2 12 +#define PRTB_SIZE_LOG2 24 /* Maximum process table size on POWER9 */ #define PRTB_SIZE (1UL << PRTB_SIZE_LOG2) -static struct patb_entry - __aligned(PATB_SIZE) initial_patb[PATB_SIZE / sizeof(struct patb_entry)]; +static struct patb_entry *initial_patb; +static struct prtb_entry *initial_prtb; -static struct prtb_entry - __aligned(PRTB_SIZE) initial_prtb[PRTB_SIZE / sizeof(struct prtb_entry)]; +static mfn_t __initdata min_alloc_mfn = {-1}; +static mfn_t __initdata max_alloc_mfn = {0}; -static __init struct lvl1_pd *lvl1_pd_pool_alloc(void) +/* + * A thin wrapper for alloc_boot_pages that keeps track of the maximum and + * minimum mfns that have been allocated. This information is used by + * setup_initial_mapping to include the allocated pages in the initial + * page mapping. + */ +static mfn_t __init initial_page_alloc(unsigned long nr_pfns, + unsigned long pfn_align) { - if ( initial_lvl1_pd_pool_used >= INITIAL_LVL1_PD_COUNT ) - { - early_printk("Ran out of space for LVL1 PD!\n"); - die(); - } + mfn_t mfn_first, mfn_last; - return &initial_lvl1_pd_pool[initial_lvl1_pd_pool_used++]; -} + mfn_first = alloc_boot_pages(nr_pfns, pfn_align); + mfn_last = _mfn(mfn_x(mfn_first) + nr_pfns - 1); -static __init struct lvl2_pd *lvl2_pd_pool_alloc(void) -{ - if ( initial_lvl2_lvl3_pd_pool_used >= INITIAL_LVL2_LVL3_PD_COUNT ) - { - early_printk("Ran out of space for LVL2/3 PD!\n"); - die(); - } + min_alloc_mfn = _mfn(min(mfn_x(min_alloc_mfn), mfn_x(mfn_first))); + max_alloc_mfn = _mfn(max(mfn_x(max_alloc_mfn), mfn_x(mfn_last))); - return &initial_lvl2_lvl3_pd_pool[initial_lvl2_lvl3_pd_pool_used++]; + return mfn_first; } -static __init struct lvl3_pd *lvl3_pd_pool_alloc(void) +static __init void *initial_pd_pt_alloc(void) { - BUILD_BUG_ON(sizeof(struct lvl3_pd) != sizeof(struct lvl2_pd)); + BUILD_BUG_ON(sizeof(struct lvl1_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl2_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl3_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl4_pt) > PAGE_SIZE); - return (struct lvl3_pd *) lvl2_pd_pool_alloc(); + return __va(mfn_to_maddr(initial_page_alloc(1, 1))); } -static __init struct lvl4_pt *lvl4_pt_pool_alloc(void) +static void map_page_initial(struct lvl1_pd *lvl1, vaddr_t virt, paddr_t phys, + unsigned long flags) { - if ( initial_lvl4_pt_pool_used >= INITIAL_LVL4_PT_COUNT ) + struct lvl2_pd *lvl2; + struct lvl3_pd *lvl3; + struct lvl4_pt *lvl4; + pde_t *pde; + pte_t *pte; + + /* Allocate LVL 2 PD if necessary */ + pde = pt_entry(lvl1, virt); + if ( !pde_is_valid(*pde) ) { - early_printk("Ran out of space for LVL4 PT!\n"); - die(); + lvl2 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_2); + } + else + lvl2 = __va(pde_to_paddr(*pde)); + + /* Allocate LVL 3 PD if necessary */ + pde = pt_entry(lvl2, virt); + if ( !pde_is_valid(*pde) ) + { + lvl3 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_3); + } + else + lvl3 = __va(pde_to_paddr(*pde)); + + /* Allocate LVL 4 PT if necessary */ + pde = pt_entry(lvl3, virt); + if ( !pde_is_valid(*pde) ) + { + lvl4 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_4); } + else + lvl4 = __va(pde_to_paddr(*pde)); - return &initial_lvl4_pt_pool[initial_lvl4_pt_pool_used++]; + /* Finally, create PTE in LVL 4 PT */ + pte = pt_entry(lvl4, virt); + if ( !pte_is_valid(*pte) ) + { + radix_dprintk("%016lx being mapped to %016lx\n", phys, virt); + *pte = paddr_to_pte(phys, flags); + } + else + { + early_printk("BUG: Tried to create PTE for already-mapped page!"); + die(); + } } static void __init setup_initial_mapping(struct lvl1_pd *lvl1, @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, paddr_t phys_base) { uint64_t page_addr; + mfn_t previous_max_alloc_mfn; if ( map_start & ~PAGE_MASK ) { @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, die(); } + /* Identity map Xen itself */ for ( page_addr = map_start; page_addr < map_end; page_addr += PAGE_SIZE ) { - struct lvl2_pd *lvl2; - struct lvl3_pd *lvl3; - struct lvl4_pt *lvl4; - pde_t *pde; - pte_t *pte; - - /* Allocate LVL 2 PD if necessary */ - pde = pt_entry(lvl1, page_addr); - if ( !pde_is_valid(*pde) ) - { - lvl2 = lvl2_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_2); - } - else - lvl2 = __va(pde_to_paddr(*pde)); + unsigned long flags; - /* Allocate LVL 3 PD if necessary */ - pde = pt_entry(lvl2, page_addr); - if ( !pde_is_valid(*pde) ) + if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) { - lvl3 = lvl3_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_3); + radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); + flags = PTE_XEN_RX; } - else - lvl3 = __va(pde_to_paddr(*pde)); - - /* Allocate LVL 4 PT if necessary */ - pde = pt_entry(lvl3, page_addr); - if ( !pde_is_valid(*pde) ) + else if ( is_kernel_rodata(page_addr) ) { - lvl4 = lvl4_pt_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_4); + radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); + flags = PTE_XEN_RO; } else - lvl4 = __va(pde_to_paddr(*pde)); - - /* Finally, create PTE in LVL 4 PT */ - pte = pt_entry(lvl4, page_addr); - if ( !pte_is_valid(*pte) ) { - unsigned long paddr = (page_addr - map_start) + phys_base; - unsigned long flags; - - radix_dprintk("%016lx being mapped to %016lx\n", paddr, page_addr); - if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) - { - radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); - flags = PTE_XEN_RX; - } - else if ( is_kernel_rodata(page_addr) ) - { - radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); - flags = PTE_XEN_RO; - } - else - { - radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); - flags = PTE_XEN_RW; - } - - *pte = paddr_to_pte(paddr, flags); - radix_dprintk("%016lx is the result of PTE map\n", - paddr_to_pte(paddr, flags).pte); - } - else - { - early_printk("BUG: Tried to create PTE for already-mapped page!"); - die(); + radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); + flags = PTE_XEN_RW; } + + map_page_initial(lvl1, page_addr, (page_addr - map_start) + phys_base, flags); + } + + previous_max_alloc_mfn = max_alloc_mfn; + + /* + * Identity map all pages we've allocated for paging structures. This act + * itself will allocate more pages, so continue until we've mapped from + * `max_alloc_mfn` down to `min_alloc_mfn`. This assumes that the heap grows + * downwards, which matches the behavior of alloc_boot_pages. + */ + for ( page_addr = (vaddr_t)__va(mfn_to_maddr(max_alloc_mfn)); + mfn_to_maddr(min_alloc_mfn) <= __pa(page_addr); + page_addr -= PAGE_SIZE) + { + map_page_initial(lvl1, page_addr, __pa(page_addr), PTE_XEN_RW); } + + if ( mfn_x(previous_max_alloc_mfn) != mfn_x(max_alloc_mfn) ) + panic("Early page heap unexpectedly grew upwards\n"); } static void __init setup_partition_table(struct lvl1_pd *root) @@ -XXX,XX +XXX,XX @@ static void __init setup_process_table(struct lvl1_pd *root) void __init setup_initial_pagetables(void) { - struct lvl1_pd *root = lvl1_pd_pool_alloc(); + struct lvl1_pd *root; unsigned long lpcr; + mfn_t patb_mfn, prtb_mfn; + + /* Allocate mfns for in-memory tables using the boot allocator */ + prtb_mfn = initial_page_alloc(PRTB_SIZE / PAGE_SIZE, + 1 << (PRTB_SIZE_LOG2 - PAGE_SHIFT)); + patb_mfn = initial_page_alloc(PATB_SIZE / PAGE_SIZE, + 1 << (PATB_SIZE_LOG2 - PAGE_SHIFT)); + + initial_patb = __va(mfn_to_maddr(patb_mfn)); + initial_prtb = __va(mfn_to_maddr(prtb_mfn)); + /* Allocate and create page tables */ + root = initial_pd_pt_alloc(); setup_initial_mapping(root, (vaddr_t)_start, (vaddr_t)_end, __pa(_start)); /* Enable Radix mode in LPCR */ -- 2.30.2
Hello all, This series enables the Xen boot time allocator on Power by parsing the available memory regions from the firmware-provided device tree. Thanks to Oleksii's work on my patches to move ARM's bootfdt code to common, v5 is much smaller. One new patch is included to fix some newly-broken behavior in bootfdt.c regarding handling of DT reserved memory maps. Thanks, Shawn Shawn Anastasio (3): xen/device-tree: Let DT reserve map entries overlap reserved-memory xen/ppc: Enable bootfdt and boot allocator xen/ppc: mm-radix: Allocate all paging structures at runtime xen/arch/ppc/mm-radix.c | 238 ++++++++++++++++-------------- xen/arch/ppc/setup.c | 20 ++- xen/common/device-tree/bootfdt.c | 37 ++++- xen/common/device-tree/bootinfo.c | 11 +- xen/include/xen/bootfdt.h | 3 +- 5 files changed, 191 insertions(+), 118 deletions(-) -- 2.30.2
Commit 53dc37829c31 ("xen/arm: Add DT reserve map regions to bootinfo.reserved_mem") changes the way reserve map regions are tracked, and as a result broke bootfdt's ability to handle device trees in which the reserve map and the `reserved-memory` node contain the same entries as each other, as is the case on PPC when booted by skiboot. Fix this behavior by moving the reserve map check to after the DT has been parsed and by explicitly allowing overlap with entries created by `reserved-memory` nodes. Fixes: 53dc37829c31 ("xen/arm: Add DT reserve map regions to bootinfo.reserved_mem") Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/common/device-tree/bootfdt.c | 28 +++++++++++++++++++++++----- xen/common/device-tree/bootinfo.c | 11 +++++++++-- xen/include/xen/bootfdt.h | 3 ++- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ static int __init device_tree_get_meminfo(const void *fdt, int node, { device_tree_get_reg(&cell, address_cells, size_cells, &start, &size); if ( mem == bootinfo_get_reserved_mem() && - check_reserved_regions_overlap(start, size) ) + check_reserved_regions_overlap(start, size, NULL) ) return -EINVAL; /* Some DT may describe empty bank, ignore them */ if ( !size ) @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) if ( nr_rsvd < 0 ) panic("Parsing FDT memory reserve map failed (%d)\n", nr_rsvd); + ret = device_tree_for_each_node(fdt, 0, early_scan_node, NULL); + if ( ret ) + panic("Early FDT parsing failed (%d)\n", ret); + for ( i = 0; i < nr_rsvd; i++ ) { + const struct membanks *overlap = NULL; struct membank *bank; paddr_t s, sz; if ( fdt_get_mem_rsv_paddr(device_tree_flattened, i, &s, &sz) < 0 ) continue; + if ( check_reserved_regions_overlap(s, sz, &overlap) ) + { + if ( overlap == bootinfo_get_reserved_mem() ) + { + /* + * Some valid device trees, such as those generated by OpenPOWER + * skiboot firmware, expose all reserved memory regions in the + * FDT memory reservation block (here) AND in the + * reserved-memory node which has already been parsed. Thus, any + * overlaps in the mem_reserved banks should be ignored. + */ + continue; + } + else + panic("FDT reserve map overlapped with membanks/modules\n"); + } + if ( reserved_mem->nr_banks < reserved_mem->max_banks ) { bank = &reserved_mem->bank[reserved_mem->nr_banks]; @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) panic("Cannot allocate reserved memory bank\n"); } - ret = device_tree_for_each_node(fdt, 0, early_scan_node, NULL); - if ( ret ) - panic("Early FDT parsing failed (%d)\n", ret); - /* * On Arm64 setup_directmap_mappings() expects to be called with the lowest * bank in memory first. There is no requirement that the DT will provide diff --git a/xen/common/device-tree/bootinfo.c b/xen/common/device-tree/bootinfo.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootinfo.c +++ b/xen/common/device-tree/bootinfo.c @@ -XXX,XX +XXX,XX @@ void __init fw_unreserved_regions(paddr_t s, paddr_t e, * existing reserved memory regions, otherwise false. */ bool __init check_reserved_regions_overlap(paddr_t region_start, - paddr_t region_size) + paddr_t region_size, + const struct membanks **out_overlapping_membanks) { const struct membanks *mem_banks[] = { bootinfo_get_reserved_mem(), @@ -XXX,XX +XXX,XX @@ bool __init check_reserved_regions_overlap(paddr_t region_start, * shared memory banks (when static shared memory feature is enabled) */ for ( i = 0; i < ARRAY_SIZE(mem_banks); i++ ) + { if ( meminfo_overlap_check(mem_banks[i], region_start, region_size) ) + { + if ( out_overlapping_membanks ) + *out_overlapping_membanks = mem_banks[i]; return true; + } + } /* Check if input region is overlapping with bootmodules */ if ( bootmodules_overlap_check(&bootinfo.modules, @@ -XXX,XX +XXX,XX @@ struct bootmodule __init *add_boot_module(bootmodule_kind kind, return NULL; } - if ( check_reserved_regions_overlap(start, size) ) + if ( check_reserved_regions_overlap(start, size, NULL) ) return NULL; for ( i = 0 ; i < mods->nr_mods ; i++ ) diff --git a/xen/include/xen/bootfdt.h b/xen/include/xen/bootfdt.h index XXXXXXX..XXXXXXX 100644 --- a/xen/include/xen/bootfdt.h +++ b/xen/include/xen/bootfdt.h @@ -XXX,XX +XXX,XX @@ struct bootinfo { extern struct bootinfo bootinfo; bool check_reserved_regions_overlap(paddr_t region_start, - paddr_t region_size); + paddr_t region_size, + const struct membanks **out_overlapping_membanks); struct bootmodule *add_boot_module(bootmodule_kind kind, paddr_t start, paddr_t size, bool domU); -- 2.30.2
Enable usage of bootfdt for populating the boot info struct from the firmware-provided device tree. Also enable the Xen boot page allocator. Additionally, modify bootfdt.c's boot_fdt_info() to tolerate the scenario in which the FDT overlaps a reserved memory region, as is the case on PPC when booted directly from skiboot. Since this means that Xen can now boot without a BOOTMOD_FDT present in bootinfo, clarify this fact in a comment above BOOTMOD_FDT's definition. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Acked-by: Julien Grall <jgrall@amazon.com> --- Changes in v5: - Drop setup.c's unnecessary `boot_fdt` variable per Julien's suggestion xen/arch/ppc/setup.c | 20 +++++++++++++++++++- xen/common/device-tree/bootfdt.c | 11 +++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/setup.c +++ b/xen/arch/ppc/setup.c @@ -XXX,XX +XXX,XX @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <xen/bootfdt.h> +#include <xen/device_tree.h> #include <xen/init.h> #include <xen/lib.h> #include <xen/mm.h> @@ -XXX,XX +XXX,XX @@ #include <asm/early_printk.h> #include <asm/mm.h> #include <asm/processor.h> +#include <asm/setup.h> /* Xen stack for bringing up the first CPU. */ unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE); @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { + const struct bootmodule *xen_bootmodule; + if ( r5 ) { /* Unsupported OpenFirmware boot protocol */ @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, else { /* kexec boot protocol */ - boot_opal_init((void *)r3); + device_tree_flattened = (void *)r3; + boot_opal_init(device_tree_flattened); } setup_exceptions(); + boot_fdt_info(device_tree_flattened, r3); + + /* + * Xen relocates itself at the ppc64 entrypoint, so we need to manually mark + * the kernel module. + */ + xen_bootmodule = add_boot_module(BOOTMOD_XEN, __pa(_start), + PAGE_ALIGN(__pa(_end)), false); + BUG_ON(!xen_bootmodule); + + populate_boot_allocator(); + setup_initial_pagetables(); init_constructors(); diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) if ( ret < 0 ) panic("No valid device tree\n"); - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); - nr_rsvd = fdt_num_mem_rsv(fdt); if ( nr_rsvd < 0 ) panic("Parsing FDT memory reserve map failed (%d)\n", nr_rsvd); @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) panic("Cannot allocate reserved memory bank\n"); } + /* + * Add module for the FDT itself after the device tree has been parsed. This + * is required on ppc64le where the device tree passed to Xen may have been + * allocated by skiboot, in which case it will exist within a reserved + * region and this call will fail. This is fine, however, since either way + * the allocator will know not to step on the device tree. + */ + (void)add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); + /* * On Arm64 setup_directmap_mappings() expects to be called with the lowest * bank in memory first. There is no requirement that the DT will provide -- 2.30.2
In the initial mm-radix implementation, the in-memory partition and process tables required to configure the MMU, as well as the page tables themselves were all allocated statically since the boot allocator was not yet available. Now that it is, allocate these structures at runtime and bump the size of the Process Table to its maximum supported value (on POWER9). Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- Changes in v5: - Add more clarification comments to min/max mfn variable decls. Changes in v4: - use mfn_add in initial_page_alloc() - zero pages returned by initial_page_alloc() xen/arch/ppc/mm-radix.c | 238 ++++++++++++++++++++++------------------ 1 file changed, 130 insertions(+), 108 deletions(-) diff --git a/xen/arch/ppc/mm-radix.c b/xen/arch/ppc/mm-radix.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/mm-radix.c +++ b/xen/arch/ppc/mm-radix.c @@ -XXX,XX +XXX,XX @@ void enable_mmu(void); #define radix_dprintk(...) #endif -#define INITIAL_LVL1_PD_COUNT 1 -#define INITIAL_LVL2_LVL3_PD_COUNT 2 -#define INITIAL_LVL4_PT_COUNT 256 - -static size_t __initdata initial_lvl1_pd_pool_used; -static struct lvl1_pd initial_lvl1_pd_pool[INITIAL_LVL1_PD_COUNT]; - -static size_t __initdata initial_lvl2_lvl3_pd_pool_used; -static struct lvl2_pd initial_lvl2_lvl3_pd_pool[INITIAL_LVL2_LVL3_PD_COUNT]; - -static size_t __initdata initial_lvl4_pt_pool_used; -static struct lvl4_pt initial_lvl4_pt_pool[INITIAL_LVL4_PT_COUNT]; - -/* Only reserve minimum Partition and Process tables */ #define PATB_SIZE_LOG2 16 /* Only supported partition table size on POWER9 */ #define PATB_SIZE (1UL << PATB_SIZE_LOG2) -#define PRTB_SIZE_LOG2 12 +#define PRTB_SIZE_LOG2 24 /* Maximum process table size on POWER9 */ #define PRTB_SIZE (1UL << PRTB_SIZE_LOG2) -static struct patb_entry - __aligned(PATB_SIZE) initial_patb[PATB_SIZE / sizeof(struct patb_entry)]; +static struct patb_entry *initial_patb; +static struct prtb_entry *initial_prtb; -static struct prtb_entry - __aligned(PRTB_SIZE) initial_prtb[PRTB_SIZE / sizeof(struct prtb_entry)]; +/* + * The highest and lowest mfns returned by initial_page_alloc. In order to + * bootstrap the xen's initial page tables, the mfns allocated to store the + * paging structurs needs to be tracked so that they can be included in the + * mapping. This is most simply accomplished by keeping track of the minimum + * and maximum mfn numbers so that the whole range from min->max can be mapped. + */ +static mfn_t __initdata min_alloc_mfn = {-1}; +static mfn_t __initdata max_alloc_mfn = {0}; -static __init struct lvl1_pd *lvl1_pd_pool_alloc(void) +/* + * A thin wrapper for alloc_boot_pages that keeps track of the maximum and + * minimum mfns that have been allocated. This information is used by + * setup_initial_mapping to include the allocated pages in the initial + * page mapping. + * + * Additionally, allocated pages are zeroed before return. + */ +static mfn_t __init initial_page_alloc(unsigned long nr_pfns, + unsigned long pfn_align) { - if ( initial_lvl1_pd_pool_used >= INITIAL_LVL1_PD_COUNT ) - { - early_printk("Ran out of space for LVL1 PD!\n"); - die(); - } + mfn_t mfn_first, mfn_last; - return &initial_lvl1_pd_pool[initial_lvl1_pd_pool_used++]; -} + mfn_first = alloc_boot_pages(nr_pfns, pfn_align); + mfn_last = mfn_add(mfn_first, nr_pfns - 1); -static __init struct lvl2_pd *lvl2_pd_pool_alloc(void) -{ - if ( initial_lvl2_lvl3_pd_pool_used >= INITIAL_LVL2_LVL3_PD_COUNT ) - { - early_printk("Ran out of space for LVL2/3 PD!\n"); - die(); - } + min_alloc_mfn = _mfn(min(mfn_x(min_alloc_mfn), mfn_x(mfn_first))); + max_alloc_mfn = _mfn(max(mfn_x(max_alloc_mfn), mfn_x(mfn_last))); + + memset(__va(mfn_to_maddr(mfn_first)), 0, nr_pfns << PAGE_SHIFT); - return &initial_lvl2_lvl3_pd_pool[initial_lvl2_lvl3_pd_pool_used++]; + return mfn_first; } -static __init struct lvl3_pd *lvl3_pd_pool_alloc(void) +static __init void *initial_pd_pt_alloc(void) { - BUILD_BUG_ON(sizeof(struct lvl3_pd) != sizeof(struct lvl2_pd)); + BUILD_BUG_ON(sizeof(struct lvl1_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl2_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl3_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl4_pt) > PAGE_SIZE); - return (struct lvl3_pd *) lvl2_pd_pool_alloc(); + return __va(mfn_to_maddr(initial_page_alloc(1, 1))); } -static __init struct lvl4_pt *lvl4_pt_pool_alloc(void) +static void map_page_initial(struct lvl1_pd *lvl1, vaddr_t virt, paddr_t phys, + unsigned long flags) { - if ( initial_lvl4_pt_pool_used >= INITIAL_LVL4_PT_COUNT ) + struct lvl2_pd *lvl2; + struct lvl3_pd *lvl3; + struct lvl4_pt *lvl4; + pde_t *pde; + pte_t *pte; + + /* Allocate LVL 2 PD if necessary */ + pde = pt_entry(lvl1, virt); + if ( !pde_is_valid(*pde) ) { - early_printk("Ran out of space for LVL4 PT!\n"); - die(); + lvl2 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_2); } + else + lvl2 = __va(pde_to_paddr(*pde)); - return &initial_lvl4_pt_pool[initial_lvl4_pt_pool_used++]; + /* Allocate LVL 3 PD if necessary */ + pde = pt_entry(lvl2, virt); + if ( !pde_is_valid(*pde) ) + { + lvl3 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_3); + } + else + lvl3 = __va(pde_to_paddr(*pde)); + + /* Allocate LVL 4 PT if necessary */ + pde = pt_entry(lvl3, virt); + if ( !pde_is_valid(*pde) ) + { + lvl4 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_4); + } + else + lvl4 = __va(pde_to_paddr(*pde)); + + /* Finally, create PTE in LVL 4 PT */ + pte = pt_entry(lvl4, virt); + if ( !pte_is_valid(*pte) ) + { + radix_dprintk("%016lx being mapped to %016lx\n", phys, virt); + *pte = paddr_to_pte(phys, flags); + } + else + { + early_printk("BUG: Tried to create PTE for already-mapped page!"); + die(); + } } static void __init setup_initial_mapping(struct lvl1_pd *lvl1, @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, paddr_t phys_base) { uint64_t page_addr; + mfn_t previous_max_alloc_mfn; if ( map_start & ~PAGE_MASK ) { @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, die(); } + /* Identity map Xen itself */ for ( page_addr = map_start; page_addr < map_end; page_addr += PAGE_SIZE ) { - struct lvl2_pd *lvl2; - struct lvl3_pd *lvl3; - struct lvl4_pt *lvl4; - pde_t *pde; - pte_t *pte; - - /* Allocate LVL 2 PD if necessary */ - pde = pt_entry(lvl1, page_addr); - if ( !pde_is_valid(*pde) ) - { - lvl2 = lvl2_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_2); - } - else - lvl2 = __va(pde_to_paddr(*pde)); - - /* Allocate LVL 3 PD if necessary */ - pde = pt_entry(lvl2, page_addr); - if ( !pde_is_valid(*pde) ) - { - lvl3 = lvl3_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_3); - } - else - lvl3 = __va(pde_to_paddr(*pde)); + unsigned long flags; - /* Allocate LVL 4 PT if necessary */ - pde = pt_entry(lvl3, page_addr); - if ( !pde_is_valid(*pde) ) + if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) { - lvl4 = lvl4_pt_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_4); + radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); + flags = PTE_XEN_RX; } - else - lvl4 = __va(pde_to_paddr(*pde)); - - /* Finally, create PTE in LVL 4 PT */ - pte = pt_entry(lvl4, page_addr); - if ( !pte_is_valid(*pte) ) + else if ( is_kernel_rodata(page_addr) ) { - unsigned long paddr = (page_addr - map_start) + phys_base; - unsigned long flags; - - radix_dprintk("%016lx being mapped to %016lx\n", paddr, page_addr); - if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) - { - radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); - flags = PTE_XEN_RX; - } - else if ( is_kernel_rodata(page_addr) ) - { - radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); - flags = PTE_XEN_RO; - } - else - { - radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); - flags = PTE_XEN_RW; - } - - *pte = paddr_to_pte(paddr, flags); - radix_dprintk("%016lx is the result of PTE map\n", - paddr_to_pte(paddr, flags).pte); + radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); + flags = PTE_XEN_RO; } else { - early_printk("BUG: Tried to create PTE for already-mapped page!"); - die(); + radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); + flags = PTE_XEN_RW; } + + map_page_initial(lvl1, page_addr, (page_addr - map_start) + phys_base, flags); } + + previous_max_alloc_mfn = max_alloc_mfn; + + /* + * Identity map all pages we've allocated for paging structures. This act + * itself will allocate more pages, so continue until we've mapped from + * `max_alloc_mfn` down to `min_alloc_mfn`. This assumes that the heap grows + * downwards, which matches the behavior of alloc_boot_pages. + */ + for ( page_addr = (vaddr_t)__va(mfn_to_maddr(max_alloc_mfn)); + mfn_to_maddr(min_alloc_mfn) <= __pa(page_addr); + page_addr -= PAGE_SIZE) + { + map_page_initial(lvl1, page_addr, __pa(page_addr), PTE_XEN_RW); + } + + if ( mfn_x(previous_max_alloc_mfn) != mfn_x(max_alloc_mfn) ) + panic("Early page heap unexpectedly grew upwards\n"); } static void __init setup_partition_table(struct lvl1_pd *root) @@ -XXX,XX +XXX,XX @@ static void __init setup_process_table(struct lvl1_pd *root) void __init setup_initial_pagetables(void) { - struct lvl1_pd *root = lvl1_pd_pool_alloc(); + struct lvl1_pd *root; unsigned long lpcr; + mfn_t patb_mfn, prtb_mfn; + + /* Allocate mfns for in-memory tables using the boot allocator */ + prtb_mfn = initial_page_alloc(PRTB_SIZE / PAGE_SIZE, + 1 << (PRTB_SIZE_LOG2 - PAGE_SHIFT)); + patb_mfn = initial_page_alloc(PATB_SIZE / PAGE_SIZE, + 1 << (PATB_SIZE_LOG2 - PAGE_SHIFT)); + + initial_patb = __va(mfn_to_maddr(patb_mfn)); + initial_prtb = __va(mfn_to_maddr(prtb_mfn)); + /* Allocate and create page tables */ + root = initial_pd_pt_alloc(); setup_initial_mapping(root, (vaddr_t)_start, (vaddr_t)_end, __pa(_start)); /* Enable Radix mode in LPCR */ -- 2.30.2