:p
atchew
Login
Hello all, This series enables the Xen boot time allocator on Power by parsing the available memory regions from the firmware-provided device tree. As suggested during review of v1, this is now accomplished by moving Arm's bootfdt.c into xen/common, along with a few modifications to adapt it to Power -- namely to tolerate a device tree with no memory reserve map entries and to tolerate an FDT that overlaps with the a reserved memory region. Additionally, the final patch of the series updates the radix mmu code to use the newly-enabled boot allocator for the Partition and Process tables, instead of statically allocating them like was previously done. Among other things, switching to run-time allocation allows us to allocate a full-sized Process Table instead of the minimal one that was previously used to keep the Xen binary size small. Thanks, Shawn Shawn Anastasio (7): xen/asm-generic: Introduce generic static-shmem.h xen/asm-generic: Introduce generic setup.h xen/common: Move Arm's bootfdt to common xen/device-tree: Fix bootfdt.c to tolerate 0 reserved regions xen/ppc: Enable bootfdt and boot allocator xen/ppc: mm-radix: Replace debug printing code with printk xen/ppc: mm-radix: Allocate Partition and Process Tables at runtime xen/arch/arm/Makefile | 1 - xen/arch/ppc/include/asm/Makefile | 1 + xen/arch/ppc/include/asm/setup.h | 117 +++++++ xen/arch/ppc/mm-radix.c | 199 ++++++------ xen/arch/ppc/setup.c | 289 +++++++++++++++++- xen/common/Makefile | 1 + xen/common/device-tree/Makefile | 1 + .../arm => common/device-tree}/bootfdt.c | 45 ++- xen/include/asm-generic/setup.h | 148 +++++++++ xen/include/asm-generic/static-shmem.h | 12 + 10 files changed, 698 insertions(+), 116 deletions(-) create mode 100644 xen/common/device-tree/Makefile rename xen/{arch/arm => common/device-tree}/bootfdt.c (93%) create mode 100644 xen/include/asm-generic/setup.h create mode 100644 xen/include/asm-generic/static-shmem.h -- 2.30.2
Introduce static-shmem.h to asm-generic as a prerequisite for moving ARM's bootfdt.c into xen/common. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/Makefile | 1 + xen/include/asm-generic/static-shmem.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 xen/include/asm-generic/static-shmem.h diff --git a/xen/arch/ppc/include/asm/Makefile b/xen/arch/ppc/include/asm/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/include/asm/Makefile +++ b/xen/arch/ppc/include/asm/Makefile @@ -XXX,XX +XXX,XX @@ generic-y += iocap.h generic-y += paging.h generic-y += percpu.h generic-y += random.h +generic-y += static-shmem.h generic-y += vm_event.h diff --git a/xen/include/asm-generic/static-shmem.h b/xen/include/asm-generic/static-shmem.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/include/asm-generic/static-shmem.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_GENERIC_STATIC_SHMEM_H__ +#define __ASM_GENERIC_STATIC_SHMEM_H__ + +static inline int process_shm_node(const void *fdt, int node, + uint32_t address_cells, uint32_t size_cells) +{ + return -EINVAL; +} + +#endif /* __ASM_GENERIC_STATIC_SHMEM_H__ */ -- 2.30.2
Introduce setup.h to asm-generic based off of ARM's to define all stubs necessary to compile bootfdt.c Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/Makefile | 1 + xen/arch/ppc/include/asm/setup.h | 6 -- xen/include/asm-generic/setup.h | 148 ++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 6 deletions(-) delete mode 100644 xen/arch/ppc/include/asm/setup.h create mode 100644 xen/include/asm-generic/setup.h diff --git a/xen/arch/ppc/include/asm/Makefile b/xen/arch/ppc/include/asm/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/include/asm/Makefile +++ b/xen/arch/ppc/include/asm/Makefile @@ -XXX,XX +XXX,XX @@ generic-y += iocap.h generic-y += paging.h generic-y += percpu.h generic-y += random.h +generic-y += setup.h generic-y += static-shmem.h generic-y += vm_event.h diff --git a/xen/arch/ppc/include/asm/setup.h b/xen/arch/ppc/include/asm/setup.h deleted file mode 100644 index XXXXXXX..XXXXXXX --- a/xen/arch/ppc/include/asm/setup.h +++ /dev/null @@ -XXX,XX +XXX,XX @@ -#ifndef __ASM_PPC_SETUP_H__ -#define __ASM_PPC_SETUP_H__ - -#define max_init_domid (0) - -#endif /* __ASM_PPC_SETUP_H__ */ diff --git a/xen/include/asm-generic/setup.h b/xen/include/asm-generic/setup.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/include/asm-generic/setup.h @@ -XXX,XX +XXX,XX @@ +#ifndef __ASM_GENERIC_SETUP_H__ +#define __ASM_GENERIC_SETUP_H__ + +#define max_init_domid (0) + +#include <xen/bug.h> + +#include <public/version.h> +#include <asm/p2m.h> +#include <xen/device_tree.h> + +#define MIN_FDT_ALIGN 8 +#define MAX_FDT_SIZE SZ_2M + +#define NR_MEM_BANKS 256 + +#define MAX_MODULES 32 /* Current maximum useful modules */ + +typedef enum { + BOOTMOD_XEN, + BOOTMOD_FDT, + BOOTMOD_KERNEL, + BOOTMOD_RAMDISK, + BOOTMOD_XSM, + BOOTMOD_GUEST_DTB, + BOOTMOD_UNKNOWN +} bootmodule_kind; + +enum membank_type { + /* + * The MEMBANK_DEFAULT type refers to either reserved memory for the + * device/firmware (when the bank is in 'reserved_mem') or any RAM (when + * the bank is in 'mem'). + */ + MEMBANK_DEFAULT, + /* + * The MEMBANK_STATIC_DOMAIN type is used to indicate whether the memory + * bank is bound to a static Xen domain. It is only valid when the bank + * is in reserved_mem. + */ + MEMBANK_STATIC_DOMAIN, + /* + * The MEMBANK_STATIC_HEAP type is used to indicate whether the memory + * bank is reserved as static heap. It is only valid when the bank is + * in reserved_mem. + */ + MEMBANK_STATIC_HEAP, +}; + +/* Indicates the maximum number of characters(\0 included) for shm_id */ +#define MAX_SHM_ID_LENGTH 16 + +struct membank { + paddr_t start; + paddr_t size; + enum membank_type type; +}; + +struct meminfo { + unsigned int nr_banks; + struct membank bank[NR_MEM_BANKS]; +}; + +/* + * The domU flag is set for kernels and ramdisks of "xen,domain" nodes. + * The purpose of the domU flag is to avoid getting confused in + * kernel_probe, where we try to guess which is the dom0 kernel and + * initrd to be compatible with all versions of the multiboot spec. + */ +#define BOOTMOD_MAX_CMDLINE 1024 +struct bootmodule { + bootmodule_kind kind; + bool domU; + paddr_t start; + paddr_t size; +}; + +/* DT_MAX_NAME is the node name max length according the DT spec */ +#define DT_MAX_NAME 41 +struct bootcmdline { + bootmodule_kind kind; + bool domU; + paddr_t start; + char dt_name[DT_MAX_NAME]; + char cmdline[BOOTMOD_MAX_CMDLINE]; +}; + +struct bootmodules { + int nr_mods; + struct bootmodule module[MAX_MODULES]; +}; + +struct bootcmdlines { + unsigned int nr_mods; + struct bootcmdline cmdline[MAX_MODULES]; +}; + +struct bootinfo { + struct meminfo mem; + struct meminfo reserved_mem; + struct bootmodules modules; + struct bootcmdlines cmdlines; + bool static_heap; +}; + +extern struct bootinfo bootinfo; + +/* + * setup.c + */ + +static inline bool check_reserved_regions_overlap(paddr_t region_start, + paddr_t region_size) +{ + /* Not implemented on GENERIC */ + BUG(); +} + +static inline struct bootmodule *add_boot_module(bootmodule_kind kind, + paddr_t start, paddr_t size, + bool domU) +{ + /* Not implemented on GENERIC */ + BUG(); +} + +static inline void add_boot_cmdline(const char *name, const char *cmdline, + bootmodule_kind kind, paddr_t start, + bool domU) +{ + /* Not implemented on GENERIC */ + BUG(); +} + +static inline const char *boot_module_kind_as_string(bootmodule_kind kind) +{ + /* Not implemented on GENERIC */ + BUG(); +} + +static inline struct bootcmdline *boot_cmdline_find_by_kind( + bootmodule_kind kind) +{ + /* Not implemented on GENERIC */ + BUG(); +} + +#endif /* __ASM_GENERIC_SETUP_H__ */ -- 2.30.2
Move Arm's bootfdt.c to xen/common so that it can be used by other device tree architectures like PPC and RISCV. Only a minor change to conditionalize a call to a function only available on EFI-supporting targets was made to the code itself. Suggested-by: Julien Grall <julien@xen.org> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/arm/Makefile | 1 - xen/common/Makefile | 1 + xen/common/device-tree/Makefile | 1 + xen/{arch/arm => common/device-tree}/bootfdt.c | 15 +++++++++------ 4 files changed, 11 insertions(+), 7 deletions(-) create mode 100644 xen/common/device-tree/Makefile rename xen/{arch/arm => common/device-tree}/bootfdt.c (98%) diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/arm/Makefile +++ b/xen/arch/arm/Makefile @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_TEE) += tee/ obj-$(CONFIG_HAS_VPCI) += vpci.o obj-$(CONFIG_HAS_ALTERNATIVE) += alternative.o -obj-y += bootfdt.init.o obj-y += cpuerrata.o obj-y += cpufeature.o obj-y += decode.o diff --git a/xen/common/Makefile b/xen/common/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -XXX,XX +XXX,XX @@ obj-$(CONFIG_UBSAN) += ubsan/ obj-$(CONFIG_NEEDS_LIBELF) += libelf/ obj-$(CONFIG_HAS_DEVICE_TREE) += libfdt/ +obj-$(CONFIG_HAS_DEVICE_TREE) += device-tree/ CONF_FILE := $(if $(patsubst /%,,$(KCONFIG_CONFIG)),$(objtree)/)$(KCONFIG_CONFIG) $(obj)/config.gz: $(CONF_FILE) diff --git a/xen/common/device-tree/Makefile b/xen/common/device-tree/Makefile new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/common/device-tree/Makefile @@ -0,0 +1 @@ +obj-y += bootfdt.init.o diff --git a/xen/arch/arm/bootfdt.c b/xen/common/device-tree/bootfdt.c similarity index 98% rename from xen/arch/arm/bootfdt.c rename to xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/arm/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ static int __init early_scan_node(const void *fdt, { int rc = 0; - /* - * If Xen has been booted via UEFI, the memory banks are - * populated. So we should skip the parsing. - */ - if ( !efi_enabled(EFI_BOOT) && - device_tree_node_matches(fdt, node, "memory") ) + if ( device_tree_node_matches(fdt, node, "memory") ) +#if defined(CONFIG_ARM_EFI) + /* + * If Xen has been booted via UEFI, the memory banks are + * populated. So we should skip the parsing. + */ + if ( efi_enabled(EFI_BOOT) ) + return rc; +#endif rc = process_memory_node(fdt, node, name, depth, address_cells, size_cells, &bootinfo.mem); else if ( depth == 1 && !dt_node_cmp(name, "reserved-memory") ) -- 2.30.2
The early_print_info routine in bootfdt.c incorrectly stores the result of a call to fdt_num_mem_rsv() in an unsigned int, which results in the negative error code being interpreted incorrectly in a subsequent loop in the case where the device tree does not contain any memory reserve map entries. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/common/device-tree/bootfdt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ static void __init early_print_info(void) struct meminfo *mem_resv = &bootinfo.reserved_mem; struct bootmodules *mods = &bootinfo.modules; struct bootcmdlines *cmds = &bootinfo.cmdlines; - unsigned int i, j, nr_rsvd; + unsigned int i, j; + int nr_rsvd; for ( i = 0; i < mi->nr_banks; i++ ) printk("RAM: %"PRIpaddr" - %"PRIpaddr"\n", @@ -XXX,XX +XXX,XX @@ static void __init early_print_info(void) boot_module_kind_as_string(mods->module[i].kind)); nr_rsvd = fdt_num_mem_rsv(device_tree_flattened); - for ( i = 0; i < nr_rsvd; i++ ) + for ( i = 0; nr_rsvd > 0 && i < nr_rsvd; i++ ) { paddr_t s, e; -- 2.30.2
Move PPC off the asm-generic setup.h and enable usage of bootfdt for populating the boot info struct from the firmware-provided device tree. Also enable the Xen boot page allocator. Includes minor changes to bootfdt.c's boot_fdt_info() to tolerate the scenario in which the FDT overlaps a reserved memory region, as is the case on PPC when booted directly from skiboot. Also includes a minor change to record Xen's correct position on PPC where Xen relocates itself to at the entrypoint. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/Makefile | 1 - xen/arch/ppc/include/asm/setup.h | 123 +++++++++++++ xen/arch/ppc/setup.c | 289 +++++++++++++++++++++++++++++- xen/common/device-tree/bootfdt.c | 25 ++- 4 files changed, 434 insertions(+), 4 deletions(-) create mode 100644 xen/arch/ppc/include/asm/setup.h diff --git a/xen/arch/ppc/include/asm/Makefile b/xen/arch/ppc/include/asm/Makefile index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/include/asm/Makefile +++ b/xen/arch/ppc/include/asm/Makefile @@ -XXX,XX +XXX,XX @@ generic-y += iocap.h generic-y += paging.h generic-y += percpu.h generic-y += random.h -generic-y += setup.h generic-y += static-shmem.h generic-y += vm_event.h diff --git a/xen/arch/ppc/include/asm/setup.h b/xen/arch/ppc/include/asm/setup.h new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/xen/arch/ppc/include/asm/setup.h @@ -XXX,XX +XXX,XX @@ +#ifndef __ASM_PPC_SETUP_H__ +#define __ASM_PPC_SETUP_H__ + +#define max_init_domid (0) + +#include <public/version.h> +#include <asm/p2m.h> +#include <xen/device_tree.h> + +#define MIN_FDT_ALIGN 8 +#define MAX_FDT_SIZE SZ_2M + +#define NR_MEM_BANKS 256 + +#define MAX_MODULES 32 /* Current maximum useful modules */ + +typedef enum { + BOOTMOD_XEN, + BOOTMOD_FDT, + BOOTMOD_KERNEL, + BOOTMOD_RAMDISK, + BOOTMOD_XSM, + BOOTMOD_GUEST_DTB, + BOOTMOD_UNKNOWN +} bootmodule_kind; + +enum membank_type { + /* + * The MEMBANK_DEFAULT type refers to either reserved memory for the + * device/firmware (when the bank is in 'reserved_mem') or any RAM (when + * the bank is in 'mem'). + */ + MEMBANK_DEFAULT, + /* + * The MEMBANK_STATIC_DOMAIN type is used to indicate whether the memory + * bank is bound to a static Xen domain. It is only valid when the bank + * is in reserved_mem. + */ + MEMBANK_STATIC_DOMAIN, + /* + * The MEMBANK_STATIC_HEAP type is used to indicate whether the memory + * bank is reserved as static heap. It is only valid when the bank is + * in reserved_mem. + */ + MEMBANK_STATIC_HEAP, +}; + +/* Indicates the maximum number of characters(\0 included) for shm_id */ +#define MAX_SHM_ID_LENGTH 16 + +struct membank { + paddr_t start; + paddr_t size; + enum membank_type type; +}; + +struct meminfo { + unsigned int nr_banks; + struct membank bank[NR_MEM_BANKS]; +}; + +/* + * The domU flag is set for kernels and ramdisks of "xen,domain" nodes. + * The purpose of the domU flag is to avoid getting confused in + * kernel_probe, where we try to guess which is the dom0 kernel and + * initrd to be compatible with all versions of the multiboot spec. + */ +#define BOOTMOD_MAX_CMDLINE 1024 +struct bootmodule { + bootmodule_kind kind; + bool domU; + paddr_t start; + paddr_t size; +}; + +/* DT_MAX_NAME is the node name max length according the DT spec */ +#define DT_MAX_NAME 41 +struct bootcmdline { + bootmodule_kind kind; + bool domU; + paddr_t start; + char dt_name[DT_MAX_NAME]; + char cmdline[BOOTMOD_MAX_CMDLINE]; +}; + +struct bootmodules { + int nr_mods; + struct bootmodule module[MAX_MODULES]; +}; + +struct bootcmdlines { + unsigned int nr_mods; + struct bootcmdline cmdline[MAX_MODULES]; +}; + +struct bootinfo { + struct meminfo mem; + struct meminfo reserved_mem; + struct bootmodules modules; + struct bootcmdlines cmdlines; + bool static_heap; +}; + +extern struct bootinfo bootinfo; + +/* + * setup.c + */ + +bool check_reserved_regions_overlap(paddr_t region_start, paddr_t region_size); +struct bootmodule *add_boot_module(bootmodule_kind kind, + paddr_t start, paddr_t size, bool domU); +void add_boot_cmdline(const char *name, const char *cmdline, + bootmodule_kind kind, paddr_t start, bool domU); +const char *boot_module_kind_as_string(bootmodule_kind kind); +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind); + +/* + * bootfdt.c + */ +size_t boot_fdt_info(const void *fdt, paddr_t paddr); + +#endif /* __ASM_PPC_SETUP_H__ */ diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/setup.c +++ b/xen/arch/ppc/setup.c @@ -XXX,XX +XXX,XX @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include <xen/init.h> #include <xen/lib.h> +#include <xen/libfdt/libfdt.h> #include <xen/mm.h> #include <public/version.h> #include <asm/boot.h> #include <asm/early_printk.h> #include <asm/mm.h> #include <asm/processor.h> +#include <asm/setup.h> /* Xen stack for bringing up the first CPU. */ unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE); +struct bootinfo __initdata bootinfo; + +void __init add_boot_cmdline(const char *name, const char *cmdline, + bootmodule_kind kind, paddr_t start, bool domU) +{ + struct bootcmdlines *cmds = &bootinfo.cmdlines; + struct bootcmdline *cmd; + + if ( cmds->nr_mods == MAX_MODULES ) + { + printk("Ignoring %s cmdline (too many)\n", name); + return; + } + + cmd = &cmds->cmdline[cmds->nr_mods++]; + cmd->kind = kind; + cmd->domU = domU; + cmd->start = start; + + ASSERT(strlen(name) <= DT_MAX_NAME); + safe_strcpy(cmd->dt_name, name); + + if ( strlen(cmdline) > BOOTMOD_MAX_CMDLINE ) + panic("module %s command line too long\n", name); + safe_strcpy(cmd->cmdline, cmdline); +} + +struct bootmodule __init *add_boot_module(bootmodule_kind kind, + paddr_t start, paddr_t size, + bool domU) +{ + struct bootmodules *mods = &bootinfo.modules; + struct bootmodule *mod; + unsigned int i; + + if ( mods->nr_mods == MAX_MODULES ) + { + printk("Ignoring %s boot module at %"PRIpaddr"-%"PRIpaddr" (too many)\n", + boot_module_kind_as_string(kind), start, start + size); + return NULL; + } + + if ( check_reserved_regions_overlap(start, size) ) + return NULL; + + for ( i = 0 ; i < mods->nr_mods ; i++ ) + { + mod = &mods->module[i]; + if ( mod->kind == kind && mod->start == start ) + { + if ( !domU ) + mod->domU = false; + return mod; + } + } + + mod = &mods->module[mods->nr_mods++]; + mod->kind = kind; + mod->start = start; + mod->size = size; + mod->domU = domU; + + return mod; +} + +const char * __init boot_module_kind_as_string(bootmodule_kind kind) +{ + switch ( kind ) + { + case BOOTMOD_XEN: return "Xen"; + case BOOTMOD_FDT: return "Device Tree"; + case BOOTMOD_KERNEL: return "Kernel"; + default: BUG(); + } +} + +/* + * TODO: '*_end' could be 0 if the module/region is at the end of the physical + * address space. This is for now not handled as it requires more rework. + */ +static bool __init bootmodules_overlap_check(struct bootmodules *bootmodules, + paddr_t region_start, + paddr_t region_size) +{ + paddr_t mod_start = INVALID_PADDR, mod_end = 0; + paddr_t region_end = region_start + region_size; + unsigned int i, mod_num = bootmodules->nr_mods; + + for ( i = 0; i < mod_num; i++ ) + { + mod_start = bootmodules->module[i].start; + mod_end = mod_start + bootmodules->module[i].size; + + if ( region_end <= mod_start || region_start >= mod_end ) + continue; + else + { + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" + " mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, + region_end, i, mod_start, mod_end); + return true; + } + } + + return false; +} + +/* + * TODO: '*_end' could be 0 if the bank/region is at the end of the physical + * address space. This is for now not handled as it requires more rework. + */ +static bool __init meminfo_overlap_check(struct meminfo *meminfo, + paddr_t region_start, + paddr_t region_size) +{ + paddr_t bank_start = INVALID_PADDR, bank_end = 0; + paddr_t region_end = region_start + region_size; + unsigned int i, bank_num = meminfo->nr_banks; + + for ( i = 0; i < bank_num; i++ ) + { + bank_start = meminfo->bank[i].start; + bank_end = bank_start + meminfo->bank[i].size; + + if ( region_end <= bank_start || region_start >= bank_end ) + continue; + else + { + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" + " bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, + region_end, i, bank_start, bank_end); + return true; + } + } + + return false; +} + +/* + * Given an input physical address range, check if this range is overlapping + * with the existing reserved memory regions defined in bootinfo. + * Return true if the input physical address range is overlapping with any + * existing reserved memory regions, otherwise false. + */ +bool __init check_reserved_regions_overlap(paddr_t region_start, + paddr_t region_size) +{ + /* Check if input region is overlapping with bootinfo.reserved_mem banks */ + if ( meminfo_overlap_check(&bootinfo.reserved_mem, + region_start, region_size) ) + return true; + + /* Check if input region is overlapping with bootmodules */ + if ( bootmodules_overlap_check(&bootinfo.modules, + region_start, region_size) ) + return true; + + return false; +} + +/* + * Return the end of the non-module region starting at s. In other + * words return s the start of the next modules after s. + * + * On input *end is the end of the region which should be considered + * and it is updated to reflect the end of the module, clipped to the + * end of the region if it would run over. + */ +static paddr_t __init next_module(paddr_t s, paddr_t *end) +{ + struct bootmodules *mi = &bootinfo.modules; + paddr_t lowest = ~(paddr_t)0; + int i; + + for ( i = 0; i < mi->nr_mods; i++ ) + { + paddr_t mod_s = mi->module[i].start; + paddr_t mod_e = mod_s + mi->module[i].size; + + if ( !mi->module[i].size ) + continue; + + if ( mod_s < s ) + continue; + if ( mod_s > lowest ) + continue; + if ( mod_s > *end ) + continue; + lowest = mod_s; + *end = min(*end, mod_e); + } + return lowest; +} + +static void __init dt_unreserved_regions(paddr_t s, paddr_t e, + void (*cb)(paddr_t ps, paddr_t pe), + unsigned int first) +{ + unsigned int i; + + for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ ) + { + paddr_t r_s = bootinfo.reserved_mem.bank[i].start; + paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i].size; + + if ( s < r_e && r_s < e ) + { + dt_unreserved_regions(r_e, e, cb, i + 1); + dt_unreserved_regions(s, r_s, cb, i + 1); + return; + } + } + + cb(s, e); +} + +/* + * boot_cmdline_find_by_kind can only be used to return Xen modules (e.g + * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest + * modules. + */ +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind) +{ + struct bootcmdlines *cmds = &bootinfo.cmdlines; + struct bootcmdline *cmd; + int i; + + for ( i = 0 ; i < cmds->nr_mods ; i++ ) + { + cmd = &cmds->cmdline[i]; + if ( cmd->kind == kind && !cmd->domU ) + return cmd; + } + return NULL; +} + +/* + * Populate the boot allocator. Based on arch/arm/setup.c's + * populate_boot_allocator. + * All RAM but the following regions will be added to the boot allocator: + * - Modules (e.g., Xen, Kernel) + * - Reserved regions + */ +static void __init populate_boot_allocator(void) +{ + unsigned int i; + const struct meminfo *banks = &bootinfo.mem; + paddr_t s, e; + + for ( i = 0; i < banks->nr_banks; i++ ) + { + const struct membank *bank = &banks->bank[i]; + paddr_t bank_end = bank->start + bank->size; + + s = bank->start; + while ( s < bank_end ) + { + paddr_t n = bank_end; + + e = next_module(s, &n); + + if ( e == ~(paddr_t)0 ) + e = n = bank_end; + + /* + * Module in a RAM bank other than the one which we are + * not dealing with here. + */ + if ( e > bank_end ) + e = bank_end; + + dt_unreserved_regions(s, e, init_boot_pages, 0); + + s = n; + } + } +} + void setup_exceptions(void) { unsigned long lpcr; @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { + void *boot_fdt; + if ( r5 ) { /* Unsupported OpenFirmware boot protocol */ @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, else { /* kexec boot protocol */ - boot_opal_init((void *)r3); + boot_fdt = (void *)r3; + boot_opal_init(boot_fdt); } setup_exceptions(); + boot_fdt_info(boot_fdt, r3); + + populate_boot_allocator(); + setup_initial_pagetables(); early_printk("Hello, ppc64le!\n"); diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) if ( ret < 0 ) panic("No valid device tree\n"); - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); - ret = device_tree_for_each_node((void *)fdt, 0, early_scan_node, NULL); if ( ret ) panic("Early FDT parsing failed (%d)\n", ret); + /* + * Add module for the FDT itself after the device tree has been parsed. This + * is required on ppc64le where the device tree passed to Xen may have been + * allocated by skiboot, in which case it will exist within a reserved + * region and this call will fail. This is fine, however, since either way + * the allocator will know not to step on the device tree. + */ + add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); + + /* + * Xen relocates itself at the ppc64 entrypoint, so we need to manually mark + * the kernel module. + */ + if ( IS_ENABLED(CONFIG_PPC64) ) { + paddr_t xen_start, xen_end; + + xen_start = __pa(_start); + xen_end = PAGE_ALIGN(__pa(_end)); + if ( !add_boot_module(BOOTMOD_XEN, xen_start, xen_end, false) ) + panic("Xen overlaps reserved memory! %016lx - %016lx\n", xen_start, + xen_end); + } + /* * On Arm64 setup_directmap_mappings() expects to be called with the lowest * bank in memory first. There is no requirement that the DT will provide -- 2.30.2
Now that we have common code building, there's no need to keep the old itoa64+debug print function in mm-radix.c Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/mm-radix.c | 58 +++++++++-------------------------------- 1 file changed, 12 insertions(+), 46 deletions(-) diff --git a/xen/arch/ppc/mm-radix.c b/xen/arch/ppc/mm-radix.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/mm-radix.c +++ b/xen/arch/ppc/mm-radix.c @@ -XXX,XX +XXX,XX @@ void enable_mmu(void); +#ifdef NDEBUG +#define radix_dprintk(...) +#else +#define radix_dprintk(msg, ...) printk(XENLOG_DEBUG msg, ## __VA_ARGS__) +#endif + #define INITIAL_LVL1_PD_COUNT 1 #define INITIAL_LVL2_LVL3_PD_COUNT 2 #define INITIAL_LVL4_PT_COUNT 256 @@ -XXX,XX +XXX,XX @@ static __init struct lvl4_pt *lvl4_pt_pool_alloc(void) return &initial_lvl4_pt_pool[initial_lvl4_pt_pool_used++]; } -#ifndef NDEBUG -/* TODO: Remove once we get common/ building */ -static char *__init itoa64_hex(uint64_t val, char *out_buf, size_t buf_len) -{ - uint64_t cur; - size_t i = buf_len - 1; - - /* Null terminate buffer */ - out_buf[i] = '\0'; - - /* Add digits in reverse */ - cur = val; - while ( cur && i > 0 ) - { - out_buf[--i] = "0123456789ABCDEF"[cur % 16]; - cur /= 16; - } - - /* Pad to 16 digits */ - while ( i > 0 ) - out_buf[--i] = '0'; - - return out_buf + i; -} -#endif - -static void __init radix_dprint(uint64_t addr, const char *msg) -{ -#ifndef NDEBUG - char buf[sizeof("DEADBEEFCAFEBABA")]; - char *addr_s = itoa64_hex(addr, buf, sizeof(buf)); - - early_printk("(0x"); - early_printk(addr_s); - early_printk(") "); - early_printk(msg); -#endif -} - static void __init setup_initial_mapping(struct lvl1_pd *lvl1, vaddr_t map_start, vaddr_t map_end, @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, unsigned long paddr = (page_addr - map_start) + phys_base; unsigned long flags; - radix_dprint(paddr, "being mapped to "); - radix_dprint(page_addr, "!\n"); + radix_dprintk("%016lx being mapped to %016lx\n", paddr, page_addr); if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) { - radix_dprint(page_addr, "being marked as TEXT (RX)\n"); + radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); flags = PTE_XEN_RX; } else if ( is_kernel_rodata(page_addr) ) { - radix_dprint(page_addr, "being marked as RODATA (RO)\n"); + radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); flags = PTE_XEN_RO; } else { - radix_dprint(page_addr, "being marked as DEFAULT (RW)\n"); + radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); flags = PTE_XEN_RW; } *pte = paddr_to_pte(paddr, flags); - radix_dprint(paddr_to_pte(paddr, flags).pte, - "is result of PTE map!\n"); + radix_dprintk("%016lx is the result of PTE map\n", + paddr_to_pte(paddr, flags).pte); } else { -- 2.30.2
In the initial mm-radix implementation, the in-memory partition and process tables required to configure the MMU were allocated statically since the boot allocator was not yet available. Now that it is, allocate these tables at runtime and bump the size of the Process Table to its maximum supported value (on POWER9). Also bump the number of static LVL2/3 PD frames to tolerate cases where the boot allocator returns an address outside of the range of the LVL2 frame used for Xen. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- Changes in v2: - Bump LVL2/3 PD count to 3 to avoid running out in case the boot allocator returns a suitably high address. xen/arch/ppc/mm-radix.c | 169 +++++++++++++++++++++++----------------- 1 file changed, 97 insertions(+), 72 deletions(-) diff --git a/xen/arch/ppc/mm-radix.c b/xen/arch/ppc/mm-radix.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/mm-radix.c +++ b/xen/arch/ppc/mm-radix.c @@ -XXX,XX +XXX,XX @@ void enable_mmu(void); #endif #define INITIAL_LVL1_PD_COUNT 1 -#define INITIAL_LVL2_LVL3_PD_COUNT 2 +#define INITIAL_LVL2_LVL3_PD_COUNT 3 #define INITIAL_LVL4_PT_COUNT 256 static size_t __initdata initial_lvl1_pd_pool_used; @@ -XXX,XX +XXX,XX @@ static struct lvl2_pd initial_lvl2_lvl3_pd_pool[INITIAL_LVL2_LVL3_PD_COUNT]; static size_t __initdata initial_lvl4_pt_pool_used; static struct lvl4_pt initial_lvl4_pt_pool[INITIAL_LVL4_PT_COUNT]; -/* Only reserve minimum Partition and Process tables */ #define PATB_SIZE_LOG2 16 /* Only supported partition table size on POWER9 */ #define PATB_SIZE (1UL << PATB_SIZE_LOG2) -#define PRTB_SIZE_LOG2 12 +#define PRTB_SIZE_LOG2 24 /* Maximum process table size on POWER9 */ #define PRTB_SIZE (1UL << PRTB_SIZE_LOG2) -static struct patb_entry - __aligned(PATB_SIZE) initial_patb[PATB_SIZE / sizeof(struct patb_entry)]; - -static struct prtb_entry - __aligned(PRTB_SIZE) initial_prtb[PRTB_SIZE / sizeof(struct prtb_entry)]; +static struct patb_entry *initial_patb; +static struct prtb_entry *initial_prtb; static __init struct lvl1_pd *lvl1_pd_pool_alloc(void) { @@ -XXX,XX +XXX,XX @@ static __init struct lvl4_pt *lvl4_pt_pool_alloc(void) return &initial_lvl4_pt_pool[initial_lvl4_pt_pool_used++]; } +static void map_page_initial(struct lvl1_pd *lvl1, vaddr_t virt, paddr_t phys, + unsigned long flags) +{ + struct lvl2_pd *lvl2; + struct lvl3_pd *lvl3; + struct lvl4_pt *lvl4; + pde_t *pde; + pte_t *pte; + + /* Allocate LVL 2 PD if necessary */ + pde = pt_entry(lvl1, virt); + if ( !pde_is_valid(*pde) ) + { + lvl2 = lvl2_pd_pool_alloc(); + *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_2); + } + else + lvl2 = __va(pde_to_paddr(*pde)); + + /* Allocate LVL 3 PD if necessary */ + pde = pt_entry(lvl2, virt); + if ( !pde_is_valid(*pde) ) + { + lvl3 = lvl3_pd_pool_alloc(); + *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_3); + } + else + lvl3 = __va(pde_to_paddr(*pde)); + + /* Allocate LVL 4 PT if necessary */ + pde = pt_entry(lvl3, virt); + if ( !pde_is_valid(*pde) ) + { + lvl4 = lvl4_pt_pool_alloc(); + *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_4); + } + else + lvl4 = __va(pde_to_paddr(*pde)); + + /* Finally, create PTE in LVL 4 PT */ + pte = pt_entry(lvl4, virt); + if ( !pte_is_valid(*pte) ) + { + radix_dprintk("%016lx being mapped to %016lx\n", phys, virt); + *pte = paddr_to_pte(phys, flags); + } + else + { + early_printk("BUG: Tried to create PTE for already-mapped page!"); + die(); + } +} + static void __init setup_initial_mapping(struct lvl1_pd *lvl1, vaddr_t map_start, vaddr_t map_end, @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, die(); } + /* Identity map Xen itself */ for ( page_addr = map_start; page_addr < map_end; page_addr += PAGE_SIZE ) { - struct lvl2_pd *lvl2; - struct lvl3_pd *lvl3; - struct lvl4_pt *lvl4; - pde_t *pde; - pte_t *pte; - - /* Allocate LVL 2 PD if necessary */ - pde = pt_entry(lvl1, page_addr); - if ( !pde_is_valid(*pde) ) - { - lvl2 = lvl2_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_2); - } - else - lvl2 = __va(pde_to_paddr(*pde)); + unsigned long flags; - /* Allocate LVL 3 PD if necessary */ - pde = pt_entry(lvl2, page_addr); - if ( !pde_is_valid(*pde) ) + if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) { - lvl3 = lvl3_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_3); + radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); + flags = PTE_XEN_RX; } - else - lvl3 = __va(pde_to_paddr(*pde)); - - /* Allocate LVL 4 PT if necessary */ - pde = pt_entry(lvl3, page_addr); - if ( !pde_is_valid(*pde) ) - { - lvl4 = lvl4_pt_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_4); - } - else - lvl4 = __va(pde_to_paddr(*pde)); - - /* Finally, create PTE in LVL 4 PT */ - pte = pt_entry(lvl4, page_addr); - if ( !pte_is_valid(*pte) ) + else if ( is_kernel_rodata(page_addr) ) { - unsigned long paddr = (page_addr - map_start) + phys_base; - unsigned long flags; - - radix_dprintk("%016lx being mapped to %016lx\n", paddr, page_addr); - if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) - { - radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); - flags = PTE_XEN_RX; - } - else if ( is_kernel_rodata(page_addr) ) - { - radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); - flags = PTE_XEN_RO; - } - else - { - radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); - flags = PTE_XEN_RW; - } - - *pte = paddr_to_pte(paddr, flags); - radix_dprintk("%016lx is the result of PTE map\n", - paddr_to_pte(paddr, flags).pte); + radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); + flags = PTE_XEN_RO; } else { - early_printk("BUG: Tried to create PTE for already-mapped page!"); - die(); + radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); + flags = PTE_XEN_RW; } + + map_page_initial(lvl1, page_addr, (page_addr - map_start) + phys_base, flags); + } + + /* Map runtime-allocated PATB, PRTB */ + for ( page_addr = (uint64_t)initial_patb; + page_addr < (uint64_t)initial_patb + PATB_SIZE; + page_addr += PAGE_SIZE ) + { + map_page_initial(lvl1, page_addr, __pa(page_addr), PTE_XEN_RW); + } + + for ( page_addr = (uint64_t)initial_prtb; + page_addr < (uint64_t)initial_prtb + PRTB_SIZE; + page_addr += PAGE_SIZE ) + { + map_page_initial(lvl1, page_addr, __pa(page_addr), PTE_XEN_RW); } } @@ -XXX,XX +XXX,XX @@ void __init setup_initial_pagetables(void) { struct lvl1_pd *root = lvl1_pd_pool_alloc(); unsigned long lpcr; + mfn_t patb_mfn, prtb_mfn; + + /* Allocate mfns for in-memory tables using the boot allocator */ + prtb_mfn = alloc_boot_pages(PRTB_SIZE / PAGE_SIZE, + max(1, PRTB_SIZE_LOG2 - PAGE_SHIFT)); + patb_mfn = alloc_boot_pages(PATB_SIZE / PAGE_SIZE, + max(1, PATB_SIZE_LOG2 - PAGE_SHIFT)); + + initial_patb = __va(mfn_to_maddr(patb_mfn)); + initial_prtb = __va(mfn_to_maddr(prtb_mfn)); setup_initial_mapping(root, (vaddr_t)_start, (vaddr_t)_end, __pa(_start)); -- 2.30.2
Hello all, This series enables the Xen boot time allocator on Power by parsing the available memory regions from the firmware-provided device tree. Thanks to Oleksii's work on my patches to move ARM's bootfdt code to common, v5 is much smaller. One new patch is included to fix some newly-broken behavior in bootfdt.c regarding handling of DT reserved memory maps. Thanks, Shawn Shawn Anastasio (3): xen/device-tree: Let DT reserve map entries overlap reserved-memory xen/ppc: Enable bootfdt and boot allocator xen/ppc: mm-radix: Allocate all paging structures at runtime xen/arch/ppc/mm-radix.c | 238 ++++++++++++++++-------------- xen/arch/ppc/setup.c | 20 ++- xen/common/device-tree/bootfdt.c | 37 ++++- xen/common/device-tree/bootinfo.c | 11 +- xen/include/xen/bootfdt.h | 3 +- 5 files changed, 191 insertions(+), 118 deletions(-) -- 2.30.2
Commit 53dc37829c31 ("xen/arm: Add DT reserve map regions to bootinfo.reserved_mem") changes the way reserve map regions are tracked, and as a result broke bootfdt's ability to handle device trees in which the reserve map and the `reserved-memory` node contain the same entries as each other, as is the case on PPC when booted by skiboot. Fix this behavior by moving the reserve map check to after the DT has been parsed and by explicitly allowing overlap with entries created by `reserved-memory` nodes. Fixes: 53dc37829c31 ("xen/arm: Add DT reserve map regions to bootinfo.reserved_mem") Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/common/device-tree/bootfdt.c | 28 +++++++++++++++++++++++----- xen/common/device-tree/bootinfo.c | 11 +++++++++-- xen/include/xen/bootfdt.h | 3 ++- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ static int __init device_tree_get_meminfo(const void *fdt, int node, { device_tree_get_reg(&cell, address_cells, size_cells, &start, &size); if ( mem == bootinfo_get_reserved_mem() && - check_reserved_regions_overlap(start, size) ) + check_reserved_regions_overlap(start, size, NULL) ) return -EINVAL; /* Some DT may describe empty bank, ignore them */ if ( !size ) @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) if ( nr_rsvd < 0 ) panic("Parsing FDT memory reserve map failed (%d)\n", nr_rsvd); + ret = device_tree_for_each_node(fdt, 0, early_scan_node, NULL); + if ( ret ) + panic("Early FDT parsing failed (%d)\n", ret); + for ( i = 0; i < nr_rsvd; i++ ) { + const struct membanks *overlap = NULL; struct membank *bank; paddr_t s, sz; if ( fdt_get_mem_rsv_paddr(device_tree_flattened, i, &s, &sz) < 0 ) continue; + if ( check_reserved_regions_overlap(s, sz, &overlap) ) + { + if ( overlap == bootinfo_get_reserved_mem() ) + { + /* + * Some valid device trees, such as those generated by OpenPOWER + * skiboot firmware, expose all reserved memory regions in the + * FDT memory reservation block (here) AND in the + * reserved-memory node which has already been parsed. Thus, any + * overlaps in the mem_reserved banks should be ignored. + */ + continue; + } + else + panic("FDT reserve map overlapped with membanks/modules\n"); + } + if ( reserved_mem->nr_banks < reserved_mem->max_banks ) { bank = &reserved_mem->bank[reserved_mem->nr_banks]; @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) panic("Cannot allocate reserved memory bank\n"); } - ret = device_tree_for_each_node(fdt, 0, early_scan_node, NULL); - if ( ret ) - panic("Early FDT parsing failed (%d)\n", ret); - /* * On Arm64 setup_directmap_mappings() expects to be called with the lowest * bank in memory first. There is no requirement that the DT will provide diff --git a/xen/common/device-tree/bootinfo.c b/xen/common/device-tree/bootinfo.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootinfo.c +++ b/xen/common/device-tree/bootinfo.c @@ -XXX,XX +XXX,XX @@ void __init fw_unreserved_regions(paddr_t s, paddr_t e, * existing reserved memory regions, otherwise false. */ bool __init check_reserved_regions_overlap(paddr_t region_start, - paddr_t region_size) + paddr_t region_size, + const struct membanks **out_overlapping_membanks) { const struct membanks *mem_banks[] = { bootinfo_get_reserved_mem(), @@ -XXX,XX +XXX,XX @@ bool __init check_reserved_regions_overlap(paddr_t region_start, * shared memory banks (when static shared memory feature is enabled) */ for ( i = 0; i < ARRAY_SIZE(mem_banks); i++ ) + { if ( meminfo_overlap_check(mem_banks[i], region_start, region_size) ) + { + if ( out_overlapping_membanks ) + *out_overlapping_membanks = mem_banks[i]; return true; + } + } /* Check if input region is overlapping with bootmodules */ if ( bootmodules_overlap_check(&bootinfo.modules, @@ -XXX,XX +XXX,XX @@ struct bootmodule __init *add_boot_module(bootmodule_kind kind, return NULL; } - if ( check_reserved_regions_overlap(start, size) ) + if ( check_reserved_regions_overlap(start, size, NULL) ) return NULL; for ( i = 0 ; i < mods->nr_mods ; i++ ) diff --git a/xen/include/xen/bootfdt.h b/xen/include/xen/bootfdt.h index XXXXXXX..XXXXXXX 100644 --- a/xen/include/xen/bootfdt.h +++ b/xen/include/xen/bootfdt.h @@ -XXX,XX +XXX,XX @@ struct bootinfo { extern struct bootinfo bootinfo; bool check_reserved_regions_overlap(paddr_t region_start, - paddr_t region_size); + paddr_t region_size, + const struct membanks **out_overlapping_membanks); struct bootmodule *add_boot_module(bootmodule_kind kind, paddr_t start, paddr_t size, bool domU); -- 2.30.2
Enable usage of bootfdt for populating the boot info struct from the firmware-provided device tree. Also enable the Xen boot page allocator. Additionally, modify bootfdt.c's boot_fdt_info() to tolerate the scenario in which the FDT overlaps a reserved memory region, as is the case on PPC when booted directly from skiboot. Since this means that Xen can now boot without a BOOTMOD_FDT present in bootinfo, clarify this fact in a comment above BOOTMOD_FDT's definition. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> Acked-by: Julien Grall <jgrall@amazon.com> --- Changes in v5: - Drop setup.c's unnecessary `boot_fdt` variable per Julien's suggestion xen/arch/ppc/setup.c | 20 +++++++++++++++++++- xen/common/device-tree/bootfdt.c | 11 +++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/setup.c +++ b/xen/arch/ppc/setup.c @@ -XXX,XX +XXX,XX @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <xen/bootfdt.h> +#include <xen/device_tree.h> #include <xen/init.h> #include <xen/lib.h> #include <xen/mm.h> @@ -XXX,XX +XXX,XX @@ #include <asm/early_printk.h> #include <asm/mm.h> #include <asm/processor.h> +#include <asm/setup.h> /* Xen stack for bringing up the first CPU. */ unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE); @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { + const struct bootmodule *xen_bootmodule; + if ( r5 ) { /* Unsupported OpenFirmware boot protocol */ @@ -XXX,XX +XXX,XX @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, else { /* kexec boot protocol */ - boot_opal_init((void *)r3); + device_tree_flattened = (void *)r3; + boot_opal_init(device_tree_flattened); } setup_exceptions(); + boot_fdt_info(device_tree_flattened, r3); + + /* + * Xen relocates itself at the ppc64 entrypoint, so we need to manually mark + * the kernel module. + */ + xen_bootmodule = add_boot_module(BOOTMOD_XEN, __pa(_start), + PAGE_ALIGN(__pa(_end)), false); + BUG_ON(!xen_bootmodule); + + populate_boot_allocator(); + setup_initial_pagetables(); init_constructors(); diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index XXXXXXX..XXXXXXX 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) if ( ret < 0 ) panic("No valid device tree\n"); - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); - nr_rsvd = fdt_num_mem_rsv(fdt); if ( nr_rsvd < 0 ) panic("Parsing FDT memory reserve map failed (%d)\n", nr_rsvd); @@ -XXX,XX +XXX,XX @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) panic("Cannot allocate reserved memory bank\n"); } + /* + * Add module for the FDT itself after the device tree has been parsed. This + * is required on ppc64le where the device tree passed to Xen may have been + * allocated by skiboot, in which case it will exist within a reserved + * region and this call will fail. This is fine, however, since either way + * the allocator will know not to step on the device tree. + */ + (void)add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); + /* * On Arm64 setup_directmap_mappings() expects to be called with the lowest * bank in memory first. There is no requirement that the DT will provide -- 2.30.2
In the initial mm-radix implementation, the in-memory partition and process tables required to configure the MMU, as well as the page tables themselves were all allocated statically since the boot allocator was not yet available. Now that it is, allocate these structures at runtime and bump the size of the Process Table to its maximum supported value (on POWER9). Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- Changes in v5: - Add more clarification comments to min/max mfn variable decls. Changes in v4: - use mfn_add in initial_page_alloc() - zero pages returned by initial_page_alloc() xen/arch/ppc/mm-radix.c | 238 ++++++++++++++++++++++------------------ 1 file changed, 130 insertions(+), 108 deletions(-) diff --git a/xen/arch/ppc/mm-radix.c b/xen/arch/ppc/mm-radix.c index XXXXXXX..XXXXXXX 100644 --- a/xen/arch/ppc/mm-radix.c +++ b/xen/arch/ppc/mm-radix.c @@ -XXX,XX +XXX,XX @@ void enable_mmu(void); #define radix_dprintk(...) #endif -#define INITIAL_LVL1_PD_COUNT 1 -#define INITIAL_LVL2_LVL3_PD_COUNT 2 -#define INITIAL_LVL4_PT_COUNT 256 - -static size_t __initdata initial_lvl1_pd_pool_used; -static struct lvl1_pd initial_lvl1_pd_pool[INITIAL_LVL1_PD_COUNT]; - -static size_t __initdata initial_lvl2_lvl3_pd_pool_used; -static struct lvl2_pd initial_lvl2_lvl3_pd_pool[INITIAL_LVL2_LVL3_PD_COUNT]; - -static size_t __initdata initial_lvl4_pt_pool_used; -static struct lvl4_pt initial_lvl4_pt_pool[INITIAL_LVL4_PT_COUNT]; - -/* Only reserve minimum Partition and Process tables */ #define PATB_SIZE_LOG2 16 /* Only supported partition table size on POWER9 */ #define PATB_SIZE (1UL << PATB_SIZE_LOG2) -#define PRTB_SIZE_LOG2 12 +#define PRTB_SIZE_LOG2 24 /* Maximum process table size on POWER9 */ #define PRTB_SIZE (1UL << PRTB_SIZE_LOG2) -static struct patb_entry - __aligned(PATB_SIZE) initial_patb[PATB_SIZE / sizeof(struct patb_entry)]; +static struct patb_entry *initial_patb; +static struct prtb_entry *initial_prtb; -static struct prtb_entry - __aligned(PRTB_SIZE) initial_prtb[PRTB_SIZE / sizeof(struct prtb_entry)]; +/* + * The highest and lowest mfns returned by initial_page_alloc. In order to + * bootstrap the xen's initial page tables, the mfns allocated to store the + * paging structurs needs to be tracked so that they can be included in the + * mapping. This is most simply accomplished by keeping track of the minimum + * and maximum mfn numbers so that the whole range from min->max can be mapped. + */ +static mfn_t __initdata min_alloc_mfn = {-1}; +static mfn_t __initdata max_alloc_mfn = {0}; -static __init struct lvl1_pd *lvl1_pd_pool_alloc(void) +/* + * A thin wrapper for alloc_boot_pages that keeps track of the maximum and + * minimum mfns that have been allocated. This information is used by + * setup_initial_mapping to include the allocated pages in the initial + * page mapping. + * + * Additionally, allocated pages are zeroed before return. + */ +static mfn_t __init initial_page_alloc(unsigned long nr_pfns, + unsigned long pfn_align) { - if ( initial_lvl1_pd_pool_used >= INITIAL_LVL1_PD_COUNT ) - { - early_printk("Ran out of space for LVL1 PD!\n"); - die(); - } + mfn_t mfn_first, mfn_last; - return &initial_lvl1_pd_pool[initial_lvl1_pd_pool_used++]; -} + mfn_first = alloc_boot_pages(nr_pfns, pfn_align); + mfn_last = mfn_add(mfn_first, nr_pfns - 1); -static __init struct lvl2_pd *lvl2_pd_pool_alloc(void) -{ - if ( initial_lvl2_lvl3_pd_pool_used >= INITIAL_LVL2_LVL3_PD_COUNT ) - { - early_printk("Ran out of space for LVL2/3 PD!\n"); - die(); - } + min_alloc_mfn = _mfn(min(mfn_x(min_alloc_mfn), mfn_x(mfn_first))); + max_alloc_mfn = _mfn(max(mfn_x(max_alloc_mfn), mfn_x(mfn_last))); + + memset(__va(mfn_to_maddr(mfn_first)), 0, nr_pfns << PAGE_SHIFT); - return &initial_lvl2_lvl3_pd_pool[initial_lvl2_lvl3_pd_pool_used++]; + return mfn_first; } -static __init struct lvl3_pd *lvl3_pd_pool_alloc(void) +static __init void *initial_pd_pt_alloc(void) { - BUILD_BUG_ON(sizeof(struct lvl3_pd) != sizeof(struct lvl2_pd)); + BUILD_BUG_ON(sizeof(struct lvl1_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl2_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl3_pd) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct lvl4_pt) > PAGE_SIZE); - return (struct lvl3_pd *) lvl2_pd_pool_alloc(); + return __va(mfn_to_maddr(initial_page_alloc(1, 1))); } -static __init struct lvl4_pt *lvl4_pt_pool_alloc(void) +static void map_page_initial(struct lvl1_pd *lvl1, vaddr_t virt, paddr_t phys, + unsigned long flags) { - if ( initial_lvl4_pt_pool_used >= INITIAL_LVL4_PT_COUNT ) + struct lvl2_pd *lvl2; + struct lvl3_pd *lvl3; + struct lvl4_pt *lvl4; + pde_t *pde; + pte_t *pte; + + /* Allocate LVL 2 PD if necessary */ + pde = pt_entry(lvl1, virt); + if ( !pde_is_valid(*pde) ) { - early_printk("Ran out of space for LVL4 PT!\n"); - die(); + lvl2 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_2); } + else + lvl2 = __va(pde_to_paddr(*pde)); - return &initial_lvl4_pt_pool[initial_lvl4_pt_pool_used++]; + /* Allocate LVL 3 PD if necessary */ + pde = pt_entry(lvl2, virt); + if ( !pde_is_valid(*pde) ) + { + lvl3 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_3); + } + else + lvl3 = __va(pde_to_paddr(*pde)); + + /* Allocate LVL 4 PT if necessary */ + pde = pt_entry(lvl3, virt); + if ( !pde_is_valid(*pde) ) + { + lvl4 = initial_pd_pt_alloc(); + *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, + XEN_PT_ENTRIES_LOG2_LVL_4); + } + else + lvl4 = __va(pde_to_paddr(*pde)); + + /* Finally, create PTE in LVL 4 PT */ + pte = pt_entry(lvl4, virt); + if ( !pte_is_valid(*pte) ) + { + radix_dprintk("%016lx being mapped to %016lx\n", phys, virt); + *pte = paddr_to_pte(phys, flags); + } + else + { + early_printk("BUG: Tried to create PTE for already-mapped page!"); + die(); + } } static void __init setup_initial_mapping(struct lvl1_pd *lvl1, @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, paddr_t phys_base) { uint64_t page_addr; + mfn_t previous_max_alloc_mfn; if ( map_start & ~PAGE_MASK ) { @@ -XXX,XX +XXX,XX @@ static void __init setup_initial_mapping(struct lvl1_pd *lvl1, die(); } + /* Identity map Xen itself */ for ( page_addr = map_start; page_addr < map_end; page_addr += PAGE_SIZE ) { - struct lvl2_pd *lvl2; - struct lvl3_pd *lvl3; - struct lvl4_pt *lvl4; - pde_t *pde; - pte_t *pte; - - /* Allocate LVL 2 PD if necessary */ - pde = pt_entry(lvl1, page_addr); - if ( !pde_is_valid(*pde) ) - { - lvl2 = lvl2_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl2), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_2); - } - else - lvl2 = __va(pde_to_paddr(*pde)); - - /* Allocate LVL 3 PD if necessary */ - pde = pt_entry(lvl2, page_addr); - if ( !pde_is_valid(*pde) ) - { - lvl3 = lvl3_pd_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl3), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_3); - } - else - lvl3 = __va(pde_to_paddr(*pde)); + unsigned long flags; - /* Allocate LVL 4 PT if necessary */ - pde = pt_entry(lvl3, page_addr); - if ( !pde_is_valid(*pde) ) + if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) { - lvl4 = lvl4_pt_pool_alloc(); - *pde = paddr_to_pde(__pa(lvl4), PDE_VALID, - XEN_PT_ENTRIES_LOG2_LVL_4); + radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); + flags = PTE_XEN_RX; } - else - lvl4 = __va(pde_to_paddr(*pde)); - - /* Finally, create PTE in LVL 4 PT */ - pte = pt_entry(lvl4, page_addr); - if ( !pte_is_valid(*pte) ) + else if ( is_kernel_rodata(page_addr) ) { - unsigned long paddr = (page_addr - map_start) + phys_base; - unsigned long flags; - - radix_dprintk("%016lx being mapped to %016lx\n", paddr, page_addr); - if ( is_kernel_text(page_addr) || is_kernel_inittext(page_addr) ) - { - radix_dprintk("%016lx being marked as TEXT (RX)\n", page_addr); - flags = PTE_XEN_RX; - } - else if ( is_kernel_rodata(page_addr) ) - { - radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); - flags = PTE_XEN_RO; - } - else - { - radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); - flags = PTE_XEN_RW; - } - - *pte = paddr_to_pte(paddr, flags); - radix_dprintk("%016lx is the result of PTE map\n", - paddr_to_pte(paddr, flags).pte); + radix_dprintk("%016lx being marked as RODATA (RO)\n", page_addr); + flags = PTE_XEN_RO; } else { - early_printk("BUG: Tried to create PTE for already-mapped page!"); - die(); + radix_dprintk("%016lx being marked as DEFAULT (RW)\n", page_addr); + flags = PTE_XEN_RW; } + + map_page_initial(lvl1, page_addr, (page_addr - map_start) + phys_base, flags); } + + previous_max_alloc_mfn = max_alloc_mfn; + + /* + * Identity map all pages we've allocated for paging structures. This act + * itself will allocate more pages, so continue until we've mapped from + * `max_alloc_mfn` down to `min_alloc_mfn`. This assumes that the heap grows + * downwards, which matches the behavior of alloc_boot_pages. + */ + for ( page_addr = (vaddr_t)__va(mfn_to_maddr(max_alloc_mfn)); + mfn_to_maddr(min_alloc_mfn) <= __pa(page_addr); + page_addr -= PAGE_SIZE) + { + map_page_initial(lvl1, page_addr, __pa(page_addr), PTE_XEN_RW); + } + + if ( mfn_x(previous_max_alloc_mfn) != mfn_x(max_alloc_mfn) ) + panic("Early page heap unexpectedly grew upwards\n"); } static void __init setup_partition_table(struct lvl1_pd *root) @@ -XXX,XX +XXX,XX @@ static void __init setup_process_table(struct lvl1_pd *root) void __init setup_initial_pagetables(void) { - struct lvl1_pd *root = lvl1_pd_pool_alloc(); + struct lvl1_pd *root; unsigned long lpcr; + mfn_t patb_mfn, prtb_mfn; + + /* Allocate mfns for in-memory tables using the boot allocator */ + prtb_mfn = initial_page_alloc(PRTB_SIZE / PAGE_SIZE, + 1 << (PRTB_SIZE_LOG2 - PAGE_SHIFT)); + patb_mfn = initial_page_alloc(PATB_SIZE / PAGE_SIZE, + 1 << (PATB_SIZE_LOG2 - PAGE_SHIFT)); + + initial_patb = __va(mfn_to_maddr(patb_mfn)); + initial_prtb = __va(mfn_to_maddr(prtb_mfn)); + /* Allocate and create page tables */ + root = initial_pd_pt_alloc(); setup_initial_mapping(root, (vaddr_t)_start, (vaddr_t)_end, __pa(_start)); /* Enable Radix mode in LPCR */ -- 2.30.2