Use the new brk_alloc() instead, with ebmalloc() merely being a thin
wrapper.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I'm not quite certain whether we ought to permit non-page-granular
reservations. The in-memory image being somewhat larger due to possibly
excessive padding isn't really a big problem, I think.
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -262,7 +262,7 @@ quiet_cmd_obj_init_o = INIT_O $@
define cmd_obj_init_o
$(OBJDUMP) -h $< | while read idx name sz rest; do \
case "$$name" in \
- .*.local) ;; \
+ .*.local|.bss..brk*) ;; \
.text|.text.*|.data|.data.*|.bss|.bss.*) \
test $$(echo $$sz | sed 's,00*,0,') != 0 || continue; \
echo "Error: size of $<:$$name is 0x$$sz" >&2; \
--- a/xen/arch/x86/efi/efi-boot.h
+++ b/xen/arch/x86/efi/efi-boot.h
@@ -10,6 +10,7 @@
#include <xen/vga.h>
#include <asm/boot-helpers.h>
+#include <asm/brk.h>
#include <asm/e820.h>
#include <asm/edd.h>
#include <asm/microcode.h>
@@ -119,6 +120,18 @@ static void __init relocate_trampoline(u
reloc_trampoline64();
}
+DEFINE_BRK(efi, MB(1));
+
+static void *__init ebmalloc(size_t size)
+{
+ void *ptr = brk_alloc(size);
+
+ if ( !ptr )
+ blexit(L"Out of BRK memory\r\n");
+
+ return ptr;
+}
+
static void __init place_string(u32 *addr, const char *s)
{
char *alloc = NULL;
--- a/xen/arch/x86/efi/stub.c
+++ b/xen/arch/x86/efi/stub.c
@@ -41,12 +41,4 @@ void __init noreturn efi_multiboot2(EFI_
void __init efi_init_memory(void) { }
-bool efi_boot_mem_unused(unsigned long *start, unsigned long *end)
-{
- /* FIXME: Simplify once the call here with two NULLs goes away. */
- if ( start || end )
- *start = *end = (unsigned long)_end;
- return false;
-}
-
void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t l4e) { }
--- a/xen/arch/x86/include/asm/brk.h
+++ b/xen/arch/x86/include/asm/brk.h
@@ -2,6 +2,10 @@
#include <xen/types.h>
+#define DEFINE_BRK(var, size) \
+ static char __section(".bss..brk.page_aligned") __aligned(PAGE_SIZE) \
+ __used var ## _brk_[size]
+
void *brk_alloc(size_t size);
unsigned long brk_get_unused_start(void);
void brk_free_unused(void);
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -9,6 +9,8 @@ extern const char __2M_rodata_start[], _
extern char __2M_init_start[], __2M_init_end[];
extern char __2M_rwdata_start[], __2M_rwdata_end[];
+extern unsigned long brk_end;
+
extern unsigned long xenheap_initial_phys_start;
extern uint64_t boot_tsc_stamp;
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -31,6 +31,7 @@
#include <asm/alternative.h>
#include <asm/apic.h>
#include <asm/bootinfo.h>
+#include <asm/brk.h>
#include <asm/bzimage.h>
#include <asm/cpu-policy.h>
#include <asm/e820.h>
@@ -163,6 +164,8 @@ cpumask_t __read_mostly cpu_present_map;
unsigned long __read_mostly xen_phys_start;
+unsigned long __ro_after_init brk_end;
+
/* Only used in asm code and within this source file */
char asmlinkage __section(".init.bss.stack_aligned") __aligned(STACK_SIZE)
cpu0_stack[STACK_SIZE];
@@ -1109,7 +1112,6 @@ void asmlinkage __init noreturn __start_
struct boot_info *bi;
unsigned long nr_pages, raw_max_page;
int i, j, bytes = 0;
- unsigned long eb_start, eb_end;
bool acpi_boot_table_init_done = false, relocated = false;
bool vm_init_done = false;
int ret;
@@ -1473,7 +1475,7 @@ void asmlinkage __init noreturn __start_
/*
* This needs to remain in sync with remove_xen_ranges() and the
* respective reserve_e820_ram() invocation below. No need to
- * query efi_boot_mem_unused() here, though.
+ * query brk_get_unused_start() here, though.
*/
xen->start = virt_to_maddr(_stext);
xen->size = __2M_rwdata_end - _stext;
@@ -1616,18 +1618,11 @@ void asmlinkage __init noreturn __start_
if ( !xen_phys_start )
panic("Not enough memory to relocate Xen\n");
- /* FIXME: Putting a hole in .bss would shatter the large page mapping. */
- if ( using_2M_mapping() )
- efi_boot_mem_unused(NULL, NULL);
-
/* This needs to remain in sync with remove_xen_ranges(). */
- if ( efi_boot_mem_unused(&eb_start, &eb_end) )
- {
- reserve_e820_ram(&boot_e820, __pa(_stext), __pa(eb_start));
- reserve_e820_ram(&boot_e820, __pa(eb_end), __pa(__2M_rwdata_end));
- }
- else
- reserve_e820_ram(&boot_e820, __pa(_stext), __pa(__2M_rwdata_end));
+ brk_end = brk_get_unused_start();
+ if ( using_2M_mapping() )
+ brk_end = PAGE_ALIGN_2M(brk_end);
+ reserve_e820_ram(&boot_e820, __pa(_stext), __pa(brk_end));
/* Late kexec reservation (dynamic start address). */
kexec_reserve_area();
@@ -2231,7 +2226,6 @@ __initcall(init_xen_cap_info);
int __hwdom_init remove_xen_ranges(struct rangeset *r)
{
- paddr_t start, end;
int rc;
/* S3 resume code (and other real mode trampoline code) */
@@ -2253,26 +2247,10 @@ int __hwdom_init remove_xen_ranges(struc
return rc;
/* hypervisor .data + .bss */
- if ( efi_boot_mem_unused(&start, &end) )
- {
- ASSERT(__pa(start) >= __pa(&__2M_rwdata_start));
- rc = rangeset_remove_range(r, PFN_DOWN(__pa(&__2M_rwdata_start)),
- PFN_DOWN(__pa(start) - 1));
- if ( rc )
- return rc;
- ASSERT(__pa(end) <= __pa(&__2M_rwdata_end));
- rc = rangeset_remove_range(r, PFN_DOWN(__pa(end)),
- PFN_DOWN(__pa(&__2M_rwdata_end) - 1));
- if ( rc )
- return rc;
- }
- else
- {
- rc = rangeset_remove_range(r, PFN_DOWN(__pa(&__2M_rwdata_start)),
- PFN_DOWN(__pa(&__2M_rwdata_end) - 1));
- if ( rc )
- return rc;
- }
+ rc = rangeset_remove_range(r, PFN_DOWN(__pa(&__2M_rwdata_start)),
+ PFN_DOWN(__pa(brk_end) - 1));
+ if ( rc )
+ return rc;
return 0;
}
--- a/xen/arch/x86/tboot.c
+++ b/xen/arch/x86/tboot.c
@@ -321,8 +321,6 @@ void tboot_shutdown(uint32_t shutdown_ty
/* if this is S3 then set regions to MAC */
if ( shutdown_type == TB_SHUTDOWN_S3 )
{
- unsigned long s, e;
-
/*
* Xen regions for tboot to MAC. This needs to remain in sync with
* remove_xen_ranges().
@@ -336,16 +334,8 @@ void tboot_shutdown(uint32_t shutdown_ty
g_tboot_shared->mac_regions[1].size = __2M_rodata_end - _stext;
/* hypervisor .data + .bss */
g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__2M_rwdata_start);
- g_tboot_shared->mac_regions[2].size = __2M_rwdata_end - __2M_rwdata_start;
- if ( efi_boot_mem_unused(&s, &e) )
- {
- g_tboot_shared->mac_regions[2].size =
- s - (unsigned long)__2M_rwdata_start;
- g_tboot_shared->mac_regions[3].start = __pa(e);
- g_tboot_shared->mac_regions[3].size =
- (unsigned long)__2M_rwdata_end - e;
- g_tboot_shared->num_mac_regions = 4;
- }
+ g_tboot_shared->mac_regions[2].size =
+ brk_end - (unsigned long)__2M_rwdata_start;
/*
* MAC domains and other Xen memory
--- a/xen/common/efi/boot.c
+++ b/xen/common/efi/boot.c
@@ -1788,8 +1788,6 @@ void __init efi_init_memory(void)
pte_attr_t prot;
} *extra, *extra_head = NULL;
- free_ebmalloc_unused_mem();
-
if ( !efi_enabled(EFI_BOOT) )
return;
--- a/xen/common/efi/ebmalloc.c
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "efi.h"
-#include <xen/init.h>
-#include <xen/mm.h>
-
-#ifdef CONFIG_ARM
-/*
- * TODO: Enable EFI boot allocator on ARM.
- * This code can be common for x86 and ARM.
- * Things TODO on ARM before enabling ebmalloc:
- * - estimate required EBMALLOC_SIZE value,
- * - where (in which section) ebmalloc_mem[] should live; if in
- * .bss.page_aligned, as it is right now, then whole BSS zeroing
- * have to be disabled in xen/arch/arm/arm64/head.S; though BSS
- * should be initialized somehow before use of variables living there,
- * - use ebmalloc() in ARM/common EFI boot code,
- * - call free_ebmalloc_unused_mem() somewhere in init code.
- */
-#define EBMALLOC_SIZE MB(0)
-#else
-#define EBMALLOC_SIZE MB(1)
-#endif
-
-static char __section(".bss.page_aligned") __aligned(PAGE_SIZE)
- ebmalloc_mem[EBMALLOC_SIZE];
-static unsigned long __read_mostly ebmalloc_allocated;
-
-/* EFI boot allocator. */
-void __init *ebmalloc(size_t size)
-{
- void *ptr = ebmalloc_mem + ebmalloc_allocated;
-
- ebmalloc_allocated += ROUNDUP(size, sizeof(void *));
-
- if ( ebmalloc_allocated > sizeof(ebmalloc_mem) )
- blexit(L"Out of static memory\r\n");
-
- return ptr;
-}
-
-bool efi_boot_mem_unused(unsigned long *start, unsigned long *end)
-{
- /* FIXME: Drop once the call here with two NULLs goes away. */
- if ( !start && !end )
- {
- ebmalloc_allocated = sizeof(ebmalloc_mem);
- return false;
- }
-
- *start = (unsigned long)ebmalloc_mem + PAGE_ALIGN(ebmalloc_allocated);
- *end = (unsigned long)ebmalloc_mem + sizeof(ebmalloc_mem);
-
- return *start < *end;
-}
-
-void __init free_ebmalloc_unused_mem(void)
-{
- unsigned long start, end;
-
- if ( !efi_boot_mem_unused(&start, &end) )
- return;
-
- destroy_xen_mappings(start, end);
-
-#ifdef CONFIG_X86
- /*
- * By reserving the space early in the E820 map, it gets freed way before
- * we make it here. Don't free the range a 2nd time.
- */
-#else
- init_xenheap_pages(__pa(start), __pa(end));
-#endif
-
- printk(XENLOG_INFO "Freed %lukB unused BSS memory\n", (end - start) >> 10);
-}
--- a/xen/common/efi/efi-common.mk
+++ b/xen/common/efi/efi-common.mk
@@ -1,4 +1,4 @@
-EFIOBJ-y := boot.init.o pe.init.o ebmalloc.o runtime.o
+EFIOBJ-y := boot.init.o pe.init.o runtime.o
EFIOBJ-$(CONFIG_COMPAT) += compat.o
CFLAGS-y += -fshort-wchar
--- a/xen/common/efi/efi.h
+++ b/xen/common/efi/efi.h
@@ -48,10 +48,6 @@ void noreturn blexit(const CHAR16 *str);
const CHAR16 *wmemchr(const CHAR16 *s, CHAR16 c, UINTN n);
-/* EFI boot allocator. */
-void *ebmalloc(size_t size);
-void free_ebmalloc_unused_mem(void);
-
const void *pe_find_section(const void *image, const UINTN image_size,
const CHAR16 *section_name, UINTN *size_out);
--- a/xen/include/xen/efi.h
+++ b/xen/include/xen/efi.h
@@ -39,7 +39,6 @@ static inline bool efi_enabled(unsigned
extern bool efi_secure_boot;
void efi_init_memory(void);
-bool efi_boot_mem_unused(unsigned long *start, unsigned long *end);
bool efi_rs_using_pgtables(void);
unsigned long efi_get_time(void);
void efi_halt_system(void);
On Thu, Nov 13, 2025 at 12:09:37PM +0100, Jan Beulich wrote:
> Use the new brk_alloc() instead, with ebmalloc() merely being a thin
> wrapper.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> I'm not quite certain whether we ought to permit non-page-granular
> reservations. The in-memory image being somewhat larger due to possibly
> excessive padding isn't really a big problem, I think.
My grep says ebmalloc is used in just two places:
1. For efi_memmap (via efi_arch_allocate_mmap_buffer())
2. For various cmdline options and module names (via aplace_string())
The second one is probably undesirable to allocate full page for each
one. On the other hand, the current approach (putting small allocations
at the same page as an earlier page-aligned one) also has its issues -
see comments on 3/3 patch.
> --- a/xen/Rules.mk
> +++ b/xen/Rules.mk
> @@ -262,7 +262,7 @@ quiet_cmd_obj_init_o = INIT_O $@
> define cmd_obj_init_o
> $(OBJDUMP) -h $< | while read idx name sz rest; do \
> case "$$name" in \
> - .*.local) ;; \
> + .*.local|.bss..brk*) ;; \
> .text|.text.*|.data|.data.*|.bss|.bss.*) \
> test $$(echo $$sz | sed 's,00*,0,') != 0 || continue; \
> echo "Error: size of $<:$$name is 0x$$sz" >&2; \
> --- a/xen/arch/x86/efi/efi-boot.h
> +++ b/xen/arch/x86/efi/efi-boot.h
> @@ -10,6 +10,7 @@
> #include <xen/vga.h>
>
> #include <asm/boot-helpers.h>
> +#include <asm/brk.h>
> #include <asm/e820.h>
> #include <asm/edd.h>
> #include <asm/microcode.h>
> @@ -119,6 +120,18 @@ static void __init relocate_trampoline(u
> reloc_trampoline64();
> }
>
> +DEFINE_BRK(efi, MB(1));
> +
> +static void *__init ebmalloc(size_t size)
> +{
> + void *ptr = brk_alloc(size);
> +
> + if ( !ptr )
> + blexit(L"Out of BRK memory\r\n");
> +
> + return ptr;
> +}
> +
> static void __init place_string(u32 *addr, const char *s)
> {
> char *alloc = NULL;
> --- a/xen/arch/x86/efi/stub.c
> +++ b/xen/arch/x86/efi/stub.c
> @@ -41,12 +41,4 @@ void __init noreturn efi_multiboot2(EFI_
>
> void __init efi_init_memory(void) { }
>
> -bool efi_boot_mem_unused(unsigned long *start, unsigned long *end)
> -{
> - /* FIXME: Simplify once the call here with two NULLs goes away. */
> - if ( start || end )
> - *start = *end = (unsigned long)_end;
> - return false;
> -}
> -
> void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t l4e) { }
> --- a/xen/arch/x86/include/asm/brk.h
> +++ b/xen/arch/x86/include/asm/brk.h
> @@ -2,6 +2,10 @@
>
> #include <xen/types.h>
>
> +#define DEFINE_BRK(var, size) \
> + static char __section(".bss..brk.page_aligned") __aligned(PAGE_SIZE) \
> + __used var ## _brk_[size]
This chunk belongs to the previous patch I think.
> +
> void *brk_alloc(size_t size);
> unsigned long brk_get_unused_start(void);
> void brk_free_unused(void);
--
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab
On 13.11.2025 13:40, Marek Marczykowski wrote: > On Thu, Nov 13, 2025 at 12:09:37PM +0100, Jan Beulich wrote: >> Use the new brk_alloc() instead, with ebmalloc() merely being a thin >> wrapper. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> --- >> I'm not quite certain whether we ought to permit non-page-granular >> reservations. The in-memory image being somewhat larger due to possibly >> excessive padding isn't really a big problem, I think. > > My grep says ebmalloc is used in just two places: > 1. For efi_memmap (via efi_arch_allocate_mmap_buffer()) > 2. For various cmdline options and module names (via aplace_string()) > > The second one is probably undesirable to allocate full page for each > one. On the other hand, the current approach (putting small allocations > at the same page as an earlier page-aligned one) also has its issues - > see comments on 3/3 patch. Imo if such sharing of a page is unwanted, then it's the side caring about the non-sharing which ought to request an exact multiple of pages. Wasting space due to doing this in the BRK implementation is undesirable. Jan
On Thu, Nov 13, 2025 at 01:53:40PM +0100, Jan Beulich wrote: > On 13.11.2025 13:40, Marek Marczykowski wrote: > > On Thu, Nov 13, 2025 at 12:09:37PM +0100, Jan Beulich wrote: > >> Use the new brk_alloc() instead, with ebmalloc() merely being a thin > >> wrapper. > >> > >> Signed-off-by: Jan Beulich <jbeulich@suse.com> > >> --- > >> I'm not quite certain whether we ought to permit non-page-granular > >> reservations. The in-memory image being somewhat larger due to possibly > >> excessive padding isn't really a big problem, I think. > > > > My grep says ebmalloc is used in just two places: > > 1. For efi_memmap (via efi_arch_allocate_mmap_buffer()) > > 2. For various cmdline options and module names (via aplace_string()) > > > > The second one is probably undesirable to allocate full page for each > > one. On the other hand, the current approach (putting small allocations > > at the same page as an earlier page-aligned one) also has its issues - > > see comments on 3/3 patch. > > Imo if such sharing of a page is unwanted, then it's the side caring about > the non-sharing which ought to request an exact multiple of pages. Wasting > space due to doing this in the BRK implementation is undesirable. Makes sense. -- Best Regards, Marek Marczykowski-Górecki Invisible Things Lab
On 13.11.2025 13:40, Marek Marczykowski wrote:
> On Thu, Nov 13, 2025 at 12:09:37PM +0100, Jan Beulich wrote:
>> --- a/xen/arch/x86/include/asm/brk.h
>> +++ b/xen/arch/x86/include/asm/brk.h
>> @@ -2,6 +2,10 @@
>>
>> #include <xen/types.h>
>>
>> +#define DEFINE_BRK(var, size) \
>> + static char __section(".bss..brk.page_aligned") __aligned(PAGE_SIZE) \
>> + __used var ## _brk_[size]
>
> This chunk belongs to the previous patch I think.
It could, but it's not used there yet (i.e. would count as dead code).
Jan
On Thu, Nov 13, 2025 at 01:46:20PM +0100, Jan Beulich wrote:
> On 13.11.2025 13:40, Marek Marczykowski wrote:
> > On Thu, Nov 13, 2025 at 12:09:37PM +0100, Jan Beulich wrote:
> >> --- a/xen/arch/x86/include/asm/brk.h
> >> +++ b/xen/arch/x86/include/asm/brk.h
> >> @@ -2,6 +2,10 @@
> >>
> >> #include <xen/types.h>
> >>
> >> +#define DEFINE_BRK(var, size) \
> >> + static char __section(".bss..brk.page_aligned") __aligned(PAGE_SIZE) \
> >> + __used var ## _brk_[size]
> >
> > This chunk belongs to the previous patch I think.
>
> It could, but it's not used there yet (i.e. would count as dead code).
But in the current shape the linker script change in the first patch is
unused. IOW, I think adding .bss..brk.page_aligned to the linker script
should go together with DEFINE_BRK.
--
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab
On 13.11.2025 13:59, Marek Marczykowski wrote:
> On Thu, Nov 13, 2025 at 01:46:20PM +0100, Jan Beulich wrote:
>> On 13.11.2025 13:40, Marek Marczykowski wrote:
>>> On Thu, Nov 13, 2025 at 12:09:37PM +0100, Jan Beulich wrote:
>>>> --- a/xen/arch/x86/include/asm/brk.h
>>>> +++ b/xen/arch/x86/include/asm/brk.h
>>>> @@ -2,6 +2,10 @@
>>>>
>>>> #include <xen/types.h>
>>>>
>>>> +#define DEFINE_BRK(var, size) \
>>>> + static char __section(".bss..brk.page_aligned") __aligned(PAGE_SIZE) \
>>>> + __used var ## _brk_[size]
>>>
>>> This chunk belongs to the previous patch I think.
>>
>> It could, but it's not used there yet (i.e. would count as dead code).
>
> But in the current shape the linker script change in the first patch is
> unused. IOW, I think adding .bss..brk.page_aligned to the linker script
> should go together with DEFINE_BRK.
Again - could be done that way, but there's no concept of "dead code" in
the linker script (afaik at least).
Jan
On Thu, Nov 13, 2025 at 02:24:38PM +0100, Jan Beulich wrote:
> On 13.11.2025 13:59, Marek Marczykowski wrote:
> > On Thu, Nov 13, 2025 at 01:46:20PM +0100, Jan Beulich wrote:
> >> On 13.11.2025 13:40, Marek Marczykowski wrote:
> >>> On Thu, Nov 13, 2025 at 12:09:37PM +0100, Jan Beulich wrote:
> >>>> --- a/xen/arch/x86/include/asm/brk.h
> >>>> +++ b/xen/arch/x86/include/asm/brk.h
> >>>> @@ -2,6 +2,10 @@
> >>>>
> >>>> #include <xen/types.h>
> >>>>
> >>>> +#define DEFINE_BRK(var, size) \
> >>>> + static char __section(".bss..brk.page_aligned") __aligned(PAGE_SIZE) \
> >>>> + __used var ## _brk_[size]
> >>>
> >>> This chunk belongs to the previous patch I think.
> >>
> >> It could, but it's not used there yet (i.e. would count as dead code).
> >
> > But in the current shape the linker script change in the first patch is
> > unused. IOW, I think adding .bss..brk.page_aligned to the linker script
> > should go together with DEFINE_BRK.
>
> Again - could be done that way, but there's no concept of "dead code" in
> the linker script (afaik at least).
Anyway, I voiced my opinion, but it isn't important enough to block the
patch on it. Either way:
Reviewed-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
--
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab
© 2016 - 2025 Red Hat, Inc.